diff --git a/CMakeLists.txt b/CMakeLists.txt
index 987e4ae709c..c4da105cac2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,6 +17,10 @@ else()
     set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
 endif()
 
+if (ENABLE_PYTHON)
+    add_compile_definitions(ENABLE_PYTHON)
+endif()
+
 set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp")
 
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC")
diff --git a/RELEASE.md b/RELEASE.md
index 4b829152a26..def72cbb206 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -70,6 +70,22 @@ Alexey Shevlyakov, avakh, baihuawei, BowenK, buxue, caifubi, caojian05, Cathy Wo
 
 Contributions of any kind are welcome!
 
+# Release 0.3.1-alpha
+
+## Major Features and Improvements
+
+### Ascend 910 Training and Inference Framework
+* Frontend and User Interface
+    * Independent model init interface.
+* Data processing, augmentation, and save format
+    * Support sample padding for minddataset.
+
+## Bugfixes
+* Python API
+    * Fix bugs in the lars optimizer([!1894](https://gitee.com/mindspore/mindspore/pulls/1894))
+* Data processing
+    * Fix accuracy problem of RandomCropDecodeResize ([!2340](https://gitee.com/mindspore/mindspore/pulls/2340))
+
 # Release 0.3.0-alpha
 
 ## Major Features and Improvements
diff --git a/build.sh b/build.sh
index 059478b9afe..cfa657ff3ed 100755
--- a/build.sh
+++ b/build.sh
@@ -24,8 +24,8 @@ usage()
 {
   echo "Usage:"
   echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
-  echo "              [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
-  echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]"
+  echo "              [-a on|off] [-Q on|off] [-S on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
+  echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]"
   echo ""
   echo "Options:"
   echo "    -d Debug mode"
@@ -48,6 +48,7 @@ usage()
   echo "    -P Enable dump anf graph to file in ProtoBuffer format, default on"
   echo "    -Q Enable dump memory, default off"
   echo "    -D Enable dumping of function graph ir, default on"
+  echo "    -S Enable async data dump, default off"
   echo "    -z Compile dataset & mindrecord, default on"
   echo "    -M Enable MPI and NCCL for GPU training, gpu default on"
   echo "    -V Specify the minimum required cuda version, default CUDA 10.1"
@@ -56,6 +57,7 @@ usage()
   echo "    -s Enable serving module, default off"
   echo "    -B Enable debugger, default off"
   echo "    -E Enable IBVERBS for parameter server, default off"
+  echo "    -l Compile with python dependency, default on"
 }
 
 # check value of input is 'on' or 'off'
@@ -87,6 +89,7 @@ checkopts()
   ENABLE_TIMELINE="off"
   ENABLE_DUMP2PROTO="on"
   ENABLE_DUMPE2E="off"
+  ENABLE_DATA_DUMP="off"
   ENABLE_DUMP_IR="on"
   COMPILE_MINDDATA="on"
   ENABLE_MPI="off"
@@ -98,9 +101,10 @@ checkopts()
   ENABLE_SERVING="off"
   ENABLE_DEBUGGER="off"
   ENABLE_IBVERBS="off"
+  ENABLE_PYTHON="on"
 
   # Process the options
-  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E' opt
+  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:S:D:zM:V:K:sB:E' opt
   do
     OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
     case "${opt}" in
@@ -151,6 +155,10 @@ checkopts()
         check_on_off $OPTARG p
         ENABLE_PROFILE="$OPTARG"
         ;;
+      l)
+        check_on_off $OPTARG l
+        ENABLE_PYTHON="$OPTARG"
+        ;;
       i)
         INC_BUILD="on"
         ;;
@@ -212,6 +220,11 @@ checkopts()
         ENABLE_DUMPE2E="$OPTARG"
         echo "enable dump end to end"
         ;;
+      S)
+        check_on_off $OPTARG S
+        ENABLE_DATA_DUMP="$OPTARG"
+        echo "enable data dump"
+        ;;
       D)
         check_on_off $OPTARG D
         ENABLE_DUMP_IR="$OPTARG"
@@ -315,7 +328,11 @@ build_mindspore()
     if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then
         CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON"
     fi
+    if [[ "X$ENABLE_DATA_DUMP" = "Xon" ]]; then
+        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DATA_DUMP=ON"
+    fi
     CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}"
+    CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}"
     if [[ "X$ENABLE_MPI" = "Xon" ]]; then
         CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON"
     fi
diff --git a/cmake/external_libs/icu4c.cmake b/cmake/external_libs/icu4c.cmake
index 7d13e4fd2ad..af69328e55a 100644
--- a/cmake/external_libs/icu4c.cmake
+++ b/cmake/external_libs/icu4c.cmake
@@ -9,11 +9,11 @@ else()
             LIBS ${LIB_ICU_COMMON} ${LIB_ICU_DATA} ${LIB_ICU_I18N}
             URL https://github.com/unicode-org/icu/archive/release-67-1.tar.gz
             MD5 0c2662a2b0bc80b0eb56495205247c8f
-            CONFIGURE_COMMAND ./icu4c/source/runConfigureICU Linux --enable-rpath --disable-tests --disable-samples --disable-icuio --disable-extras ICU_DATA_FILTER_FILE=${CMAKE_SOURCE_DIR}/third_party/icu4c/filter.json
+            CONFIGURE_COMMAND ${CMAKE_SOURCE_DIR}/scripts/build_icu4c.sh
             )
     include_directories(${icu4c_INC})
     add_library(mindspore::icuuc ALIAS icu4c::${LIB_ICU_COMMON})
     add_library(mindspore::icudata ALIAS icu4c::${LIB_ICU_DATA})
     add_library(mindspore::icui18n ALIAS icu4c::${LIB_ICU_I18N})
     add_definitions(-D ENABLE_ICU4C)
-endif()
\ No newline at end of file
+endif()
diff --git a/cmake/mind_expression.cmake b/cmake/mind_expression.cmake
index 63a65cd533f..9002c239761 100644
--- a/cmake/mind_expression.cmake
+++ b/cmake/mind_expression.cmake
@@ -15,7 +15,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/json.cmake)
 include(${CMAKE_SOURCE_DIR}/cmake/dependency_securec.cmake)
 include(${CMAKE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake)
 
-if (ENABLE_DEBUGGER)
+if (ENABLE_DEBUGGER OR ENABLE_SERVING)
     # build dependencies of gRPC
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/absl.cmake)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/c-ares.cmake)
@@ -30,7 +30,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/flatbuffers.cmake)
 if(USE_GLOG)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/glog.cmake)
 endif()
-if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows" AND NOT ENABLE_GE)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zeromq.cmake)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pslite.cmake)
 endif()
diff --git a/cmake/options.cmake b/cmake/options.cmake
index 18db942d681..2470c25a90c 100644
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@@ -19,6 +19,7 @@ option(ENABLE_MPI "enable mpi" OFF)
 option(ENABLE_AKG "enable akg" OFF)
 option(ENABLE_DEBUGGER "enable debugger" OFF)
 option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF)
+option(ENABLE_PYTHON "Enable python" ON)
 
 if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     if (WIN32)
@@ -115,6 +116,10 @@ if(ENABLE_DUMP_E2E)
     add_compile_definitions(ENABLE_DUMP_E2E)
 endif()
 
+if(ENABLE_DATA_DUMP)
+    add_compile_definitions(ENABLE_DATA_DUMP)
+endif()
+
 if(ENABLE_DEBUGGER)
     add_compile_definitions(ENABLE_DEBUGGER)
 endif()
diff --git a/cmake/package.cmake b/cmake/package.cmake
index 2fde01af4f2..7b3c2f7bb2e 100644
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@@ -213,7 +213,6 @@ install(
         ${CMAKE_SOURCE_DIR}/mindspore/parallel
         ${CMAKE_SOURCE_DIR}/mindspore/mindrecord
         ${CMAKE_SOURCE_DIR}/mindspore/train
-        ${CMAKE_SOURCE_DIR}/mindspore/model_zoo
         ${CMAKE_SOURCE_DIR}/mindspore/common
         ${CMAKE_SOURCE_DIR}/mindspore/ops
         ${CMAKE_SOURCE_DIR}/mindspore/communication
@@ -261,3 +260,17 @@ if (EXISTS ${CMAKE_SOURCE_DIR}/mindspore/dataset)
         COMPONENT mindspore
     )
 endif ()
+
+if (ENABLE_SERVING)
+    install(
+        TARGETS ms_serving
+        DESTINATION ${INSTALL_BASE_DIR}
+        COMPONENT mindspore
+    )
+
+    install(
+        TARGETS inference
+        DESTINATION ${INSTALL_LIB_DIR}
+        COMPONENT mindspore
+    )
+endif ()
diff --git a/config/data_dump.json b/config/data_dump.json
new file mode 100644
index 00000000000..fc08f785906
--- /dev/null
+++ b/config/data_dump.json
@@ -0,0 +1,15 @@
+{
+  "DumpSettings": {
+    "net_name": "ResNet50",
+    "mode": 1,
+    "iteration": 0,
+    "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
+  },
+
+  "DumpSettingsSpec": {
+    "net_name": "net name eg:ResNet50",
+    "mode": "0: dump all kernels, 1: dump kernels in kernels list",
+    "iteration": "specified iteration ",
+    "kernels": "op's full scope name which need to be dump"
+  }
+}
\ No newline at end of file
diff --git a/config/op_info.config b/config/op_info.config
new file mode 100644
index 00000000000..6ab9eba8754
--- /dev/null
+++ b/config/op_info.config
@@ -0,0 +1,383 @@
+{"op_name": "InitData", "inputs": [], "outputs": [], "attr": [{"name": "queue_name", "type": "str"}], "fusion_type": "OPAQUE", "dtype_format": [], "imply_type": "AiCPU"}
+{"op_name": "DropoutGenMask", "inputs": [{"index": 0, "name": "x1", "param_type": "required"}, {"index": 1, "name": "x2", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [{"name": "Seed0", "type": "int"}, {"name": "Seed1", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "NCHW"], ["float16", "NCHW"], ["uint8", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "GetNext", "inputs": [], "outputs": [{"index": 0, "name": "y", "param_type": "dynamic"}], "attr": [{"name": "shared_name", "type": "str"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"]], [["int8", "DefaultFormat"]], [["int16", "DefaultFormat"]], [["int32", "DefaultFormat"]], [["int64", "DefaultFormat"]], [["float16", "DefaultFormat"]], [["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"]], [["float32", "DefaultFormat"]]], "imply_type": "AiCPU"}
+{"op_name": "Print", "inputs": [{"index": 0, "name": "x", "param_type": "dynamic"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AiCPU"}
+{"op_name": "TopK", "inputs": [{"index": 0, "name": "intput", "param_type": "required"}, {"index": 1, "name": "k", "param_type": "required"}], "outputs": [{"index": 0, "name": "values", "param_type": "required"}, {"index": 1, "name": "indices", "param_type": "required"}], "attr": [{"name": "sorted", "type": "bool"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "AiCPU"}
+{"op_name": "IsFinite", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int16", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int64", "DefaultFormat"], ["bool", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["bool", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["bool", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["bool", "DefaultFormat"]], [["float16", "DefaultFormat"], ["bool", "DefaultFormat"]], [["float32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["float64", "DefaultFormat"], ["bool", "DefaultFormat"]], [["bool", "NCHW"], ["bool", "NCHW"]], [["int8", "NCHW"], ["bool", "NCHW"]], [["int16", "NCHW"], ["bool", "NCHW"]], [["int32", "NCHW"], ["bool", "NCHW"]], [["int64", "NCHW"], ["bool", "NCHW"]], [["uint8", "NCHW"], ["bool", "NCHW"]], [["uint16", "NCHW"], ["bool", "NCHW"]], [["uint32", "NCHW"], ["bool", "NCHW"]], [["uint64", "NCHW"], ["bool", "NCHW"]], [["float16", "NCHW"], ["bool", "NCHW"]], [["float32", "NCHW"], ["bool", "NCHW"]], [["float64", "NCHW"], ["bool", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "Reshape", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["float64", "DefaultFormat"]], [["bool", "NCHW"], ["bool", "NCHW"]], [["int8", "NCHW"], ["int8", "NCHW"]], [["int16", "NCHW"], ["int16", "NCHW"]], [["int32", "NCHW"], ["int32", "NCHW"]], [["int64", "NCHW"], ["int64", "NCHW"]], [["uint8", "NCHW"], ["uint8", "NCHW"]], [["uint16", "NCHW"], ["uint16", "NCHW"]], [["uint32", "NCHW"], ["uint32", "NCHW"]], [["uint64", "NCHW"], ["uint64", "NCHW"]], [["float16", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["float32", "NCHW"]], [["float64", "NCHW"], ["float64", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "Flatten", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "NCHW"], ["int8", "NCHW"]], [["int16", "NCHW"], ["int16", "NCHW"]], [["int32", "NCHW"], ["int32", "NCHW"]], [["int64", "NCHW"], ["int64", "NCHW"]], [["uint8", "NCHW"], ["uint8", "NCHW"]], [["uint16", "NCHW"], ["uint16", "NCHW"]], [["uint32", "NCHW"], ["uint32", "NCHW"]], [["uint64", "NCHW"], ["uint64", "NCHW"]], [["float16", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["float32", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "Squeeze", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["float64", "DefaultFormat"]], [["bool", "NCHW"], ["bool", "NCHW"]], [["int8", "NCHW"], ["int8", "NCHW"]], [["int16", "NCHW"], ["int16", "NCHW"]], [["int32", "NCHW"], ["int32", "NCHW"]], [["int64", "NCHW"], ["int64", "NCHW"]], [["uint8", "NCHW"], ["uint8", "NCHW"]], [["uint16", "NCHW"], ["uint16", "NCHW"]], [["uint32", "NCHW"], ["uint32", "NCHW"]], [["uint64", "NCHW"], ["uint64", "NCHW"]], [["float16", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["float32", "NCHW"]], [["float64", "NCHW"], ["float64", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "ExpandDims", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["float64", "DefaultFormat"]], [["bool", "NCHW"], ["bool", "NCHW"]], [["int8", "NCHW"], ["int8", "NCHW"]], [["int16", "NCHW"], ["int16", "NCHW"]], [["int32", "NCHW"], ["int32", "NCHW"]], [["int64", "NCHW"], ["int64", "NCHW"]], [["uint8", "NCHW"], ["uint8", "NCHW"]], [["uint16", "NCHW"], ["uint16", "NCHW"]], [["uint32", "NCHW"], ["uint32", "NCHW"]], [["uint64", "NCHW"], ["uint64", "NCHW"]], [["float16", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["float32", "NCHW"]], [["float64", "NCHW"], ["float64", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "RandomChoiceWithMask", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}, {"index": 1, "name": "mask", "param_type": "required"}], "attr": [{"name": "count", "type": "int"}, {"name": "seed", "type": "int"}, {"name": "seed2", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "NCHW"], ["int32", "NCHW"], ["bool", "NCHW"]], [["bool", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AiCPU"}
+{"op_name": "Pack", "inputs": [{"index": 0, "name": "x", "param_type": "dynamic"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [{"name": "axis", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["float64", "DefaultFormat"]], [["bool", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AiCPU"}
+{"op_name": "Normal", "inputs": [{"index": 0, "name": "shape", "param_type": "required"}, {"index": 1, "name": "mean", "param_type": "required"}, {"index": 2, "name": "stddev", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [{"name": "seed", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "CTCLoss", "inputs": [{"index": 0, "name": "inputs", "param_type": "required"}, {"index": 1, "name": "labels_indices", "param_type": "required"}, {"index": 2, "name": "labels_values", "param_type": "required"}, {"index": 3, "name": "sequence_length", "param_type": "required"}], "outputs": [{"index": 0, "name": "loss", "param_type": "required"}, {"index": 1, "name": "gradient", "param_type": "required"}], "attr": [{"name": "preprocess_collapse_repeated", "type": "bool"}, {"name": "ctc_merge_repeated", "type": "bool"}, {"name": "ignore_longer_outputs_than_inputs", "type": "bool"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["int64", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float64", "DefaultFormat"], ["float64", "DefaultFormat"]], [["float32", "NCHW"], ["int64", "NCHW"], ["int32", "NCHW"], ["int32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float64", "NCHW"], ["int64", "NCHW"], ["int32", "NCHW"], ["int32", "NCHW"], ["float64", "NCHW"], ["float64", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "ReverseSequence", "inputs": [{"index": 0, "name": "x", "param_type": "required"}, {"index": 1, "name": "seq_lengths", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [{"name": "seq_dim", "type": "int"}, {"name": "batch_dim", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int32", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int32", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["int32", "DefaultFormat"], ["float64", "DefaultFormat"]], [["bool", "NCHW"], ["int32", "NCHW"], ["bool", "NCHW"]], [["int8", "NCHW"], ["int32", "NCHW"], ["int8", "NCHW"]], [["int16", "NCHW"], ["int32", "NCHW"], ["int16", "NCHW"]], [["int32", "NCHW"], ["int32", "NCHW"], ["int32", "NCHW"]], [["int64", "NCHW"], ["int32", "NCHW"], ["int64", "NCHW"]], [["uint8", "NCHW"], ["int32", "NCHW"], ["uint8", "NCHW"]], [["uint16", "NCHW"], ["int32", "NCHW"], ["uint16", "NCHW"]], [["uint32", "NCHW"], ["int32", "NCHW"], ["uint32", "NCHW"]], [["uint64", "NCHW"], ["int32", "NCHW"], ["uint64", "NCHW"]], [["float16", "NCHW"], ["int32", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["int32", "NCHW"], ["float32", "NCHW"]], [["float64", "NCHW"], ["int32", "NCHW"], ["float64", "NCHW"]], [["bool", "DefaultFormat"], ["int64", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int64", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int64", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int64", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["int64", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["int64", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["int64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["int64", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int64", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["int64", "DefaultFormat"], ["float64", "DefaultFormat"]], [["bool", "NCHW"], ["int64", "NCHW"], ["bool", "NCHW"]], [["int8", "NCHW"], ["int64", "NCHW"], ["int8", "NCHW"]], [["int16", "NCHW"], ["int64", "NCHW"], ["int16", "NCHW"]], [["int32", "NCHW"], ["int64", "NCHW"], ["int32", "NCHW"]], [["int64", "NCHW"], ["int64", "NCHW"], ["int64", "NCHW"]], [["uint8", "NCHW"], ["int64", "NCHW"], ["uint8", "NCHW"]], [["uint16", "NCHW"], ["int64", "NCHW"], ["uint16", "NCHW"]], [["uint32", "NCHW"], ["int64", "NCHW"], ["uint32", "NCHW"]], [["uint64", "NCHW"], ["int64", "NCHW"], ["uint64", "NCHW"]], [["float16", "NCHW"], ["int64", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["int64", "NCHW"], ["float32", "NCHW"]], [["float64", "NCHW"], ["int64", "NCHW"], ["float64", "NCHW"]]], "imply_type": "AiCPU"}
+{"op_name": "CropAndResize", "inputs": [{"index": 0, "name": "image", "param_type": "required"}, {"index": 1, "name": "boxes", "param_type": "required"}, {"index": 2, "name": "box_index", "param_type": "required"}, {"index": 3, "name": "crop_size", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [{"name": "method", "type": "str"}, {"name": "extrapolation_value", "type": "float"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int16", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["int16", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["int32", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["int64", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["float16", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["float32", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["float64", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["uint8", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]], [["uint16", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"]]], "imply_type": "AiCPU"}
+{"op_name": "EndOfSequence", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "AiCPU"}
+{"op_name": "Abs", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "AddN", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "dynamic", "name": "inputs"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "TensorAdd", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0", "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}, {"index": 1, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0", "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "y"}], "outputs": [{"index": 0, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0", "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "ApplyMomentum", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "use_nesterov", "param_type": "optional", "type": "bool"}, {"name": "gradient_scale", "param_type": "optional", "type": "float"}], "inputs": [{"index": 0, "dtype": ["float32", "float32", "float32"], "format": ["DefaultFormat", "NC1HWC0", "FracZ"], "name": "variable"}, {"index": 1, "dtype": ["float32", "float32", "float32"], "format": ["DefaultFormat", "NC1HWC0", "FracZ"], "name": "accumulation"}, {"index": 2, "dtype": ["float32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "learning_rate"}, {"index": 3, "dtype": ["float32", "float32", "float32"], "format": ["DefaultFormat", "NC1HWC0", "FracZ"], "name": "gradient"}, {"index": 4, "dtype": ["float32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "momentum"}], "outputs": [{"index": 0, "dtype": ["float32", "float32", "float32"], "format": ["DefaultFormat", "NC1HWC0", "FracZ"], "name": "output"}]}
+{"op_name": "Assign", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"], "name": "ref"}, {"index": 1, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"], "name": "value"}], "outputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"], "name": "output"}]}
+{"op_name": "InplaceAssign", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "fake_output", "param_type": "optional", "type": "bool"}], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"], "name": "x"}, {"index": 1, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"], "name": "y"}, {"index": 2, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"], "name": "z"}], "outputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"], "name": "output"}]}
+{"op_name": "AssignAdd", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "ref"}, {"index": 1, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "value"}], "outputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "BiasAddGrad", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "data_format", "param_type": "optional", "type": "listStr"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["NHWC", "NHWC", "NC1HWC0", "NC1HWC0", "DefaultFormat", "DefaultFormat"], "name": "dout"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "DefaultFormat"], "name": "output"}]}
+{"op_name": "BiasAdd", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "data_format", "param_type": "optional", "type": "listStr"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["NHWC", "NHWC", "NC1HWC0", "NC1HWC0", "DefaultFormat", "DefaultFormat"], "name": "x"}, {"index": 1, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["NHWC", "NHWC", "NC1HWC0", "NC1HWC0", "DefaultFormat", "DefaultFormat"], "name": "b"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "DefaultFormat"], "name": "output"}]}
+{"op_name": "Cast", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "dst_type", "param_type": "required", "type": "str"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "bool", "bool", "float16", "float32", "int32", "int32", "bool", "float16", "float32", "bool", "bool", "float16", "float32", "bool", "bool"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float32", "float16", "int32", "float16", "int32", "int32", "float16", "float32", "float32", "float32", "float16", "int32", "float32", "float32", "float16", "int32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "ClearZero", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "pad_mod", "param_type": "optional", "type": "string"}, {"name": "window", "param_type": "optional", "type": "int"}, {"name": "pad", "param_type": "optional", "type": "int"}, {"name": "stride", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": []}
+{"op_name": "ConvBN1", "imply_type": "AutoDiff", "fusion_type": "CONVLUTION", "attr": [{"name": "x_shape", "param_type": "required", "type": "listInt"}, {"name": "w_shape", "param_type": "required", "type": "listInt"}, {"name": "pad_list", "param_type": "required", "type": "listInt"}, {"name": "stride", "param_type": "optional", "type": "int"}, {"name": "dilation", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float16"], "format": ["FracZ"], "name": "w"}], "outputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "conv_res_16"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "var_part"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "mean"}]}
+{"op_name": "Conv2DBackpropFilter", "imply_type": "AutoDiff", "fusion_type": "CONVLUTION", "attr": [{"name": "input_shape", "param_type": "required", "type": "listInt"}, {"name": "filter_sizes", "param_type": "required", "type": "listInt"}, {"name": "stride", "param_type": "optional", "type": "int"}, {"name": "pad_list", "param_type": "required", "type": "listInt"}, {"name": "dilation", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "out_backprop"}, {"index": 1, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "input"}], "outputs": [{"index": 0, "dtype": ["float32"], "format": ["FracZ"], "name": "output"}]}
+{"op_name": "Conv2DBackpropInput", "imply_type": "AutoDiff", "fusion_type": "CONVLUTION", "attr": [{"name": "input_sizes", "param_type": "required", "type": "listInt"}, {"name": "filter_shape", "param_type": "required", "type": "listInt"}, {"name": "stride", "param_type": "optional", "type": "int"}, {"name": "pad_list", "param_type": "required", "type": "listInt"}, {"name": "dilation", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "out_backprop"}, {"index": 1, "dtype": ["float16"], "format": ["FracZ"], "name": "filter"}], "outputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "output"}]}
+{"op_name": "Conv2D", "imply_type": "AutoDiff", "fusion_type": "CONVLUTION", "attr": [{"name": "x_shape", "param_type": "required", "type": "listInt"}, {"name": "w_shape", "param_type": "required", "type": "listInt"}, {"name": "pad_list", "param_type": "required", "type": "listInt"}, {"name": "stride", "param_type": "optional", "type": "int"}, {"name": "dilation", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float16"], "format": ["FracZ"], "name": "w"}], "outputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "output"}]}
+{"op_name": "Div", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "EqualCount", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [], "inputs": [{"index": 0, "dtype": ["int32"], "format": ["DefaultFormat"], "name": "x"}, {"index": 1, "dtype": ["int32"], "format": ["DefaultFormat"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["int32"], "format": ["DefaultFormat"], "name": "output"}]}
+{"op_name": "Exp", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "Five2Four", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "shape4d", "param_type": "required", "type": "listInt"}, {"name": "dstType", "param_type": "required", "type": "str"}, {"name": "output_format", "param_type": "required", "type": "str"}], "inputs": [{"index": 0, "dtype": ["float16", "float16", "float16", "float32", "float16", "float32"], "format": ["NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float16", "float32", "float32", "float32", "float32"], "format": ["DefaultFormat", "NHWC", "DefaultFormat", "DefaultFormat", "NHWC", "NHWC"], "name": "output"}]}
+{"op_name": "Four2Five", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "data_format", "param_type": "optional", "type": "listStr"}, {"name": "dst_type", "param_type": "required", "type": "str"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float32", "float16", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NHWC", "NHWC", "NHWC"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float16", "float32", "float16", "float16", "float32"], "format": ["NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "FusedBatchNormGrad", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "data_format", "param_type": "optional", "type": "listStr"}], "inputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "dy"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "x"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "scale"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "save_mean"}, {"index": 4, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "save_inv_variance"}], "outputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "dx"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "bn_scale"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "bn_bias"}]}
+{"op_name": "FusedBatchNormInfer", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "momentum", "param_type": "optional", "type": "float"}, {"name": "epsilon", "param_type": "optional", "type": "float"}, {"name": "data_format", "param_type": "optional", "type": "listStr"}], "inputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "scale"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "b"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "mean"}, {"index": 4, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "variance"}], "outputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "y"}]}
+{"op_name": "FusedBatchNorm", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "momentum", "param_type": "optional", "type": "float"}, {"name": "epsilon", "param_type": "optional", "type": "float"}, {"name": "data_format", "param_type": "optional", "type": "listStr"}], "inputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "scale"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "b"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "mean"}, {"index": 4, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "variance"}], "outputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "y"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "running_mean"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "running_variance"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "save_mean"}, {"index": 4, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "save_inv_variance"}]}
+{"op_name": "BNGrad1", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "dy"}, {"index": 1, "dtype": ["float16", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "data"}, {"index": 2, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "mean"}], "outputs": [{"index": 0, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "output"}, {"index": 1, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "output"}, {"index": 2, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "FusedBN1", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "data"}], "outputs": [{"index": 0, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "output"}, {"index": 1, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "BNGrad2", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "eps", "param_type": "optional", "type": "float"}, {"name": "data_shape", "param_type": "optional", "type": "listInt"}], "inputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "dgamma_red_hw"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "dbeta_red_hw"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "variance"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "gamma"}], "outputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}, {"index": 4, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}]}
+{"op_name": "FusedBN2", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "momentum", "param_type": "optional", "type": "float"}], "inputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "mean"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "var_part"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "running_mean"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "running_var"}], "outputs": [{"index": 0, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "output"}]}
+{"op_name": "BNGrad3", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "dy"}, {"index": 1, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "rs"}, {"index": 2, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "dgamma_dx"}, {"index": 3, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "dbeta_dx"}, {"index": 4, "dtype": ["float32", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "data_minus_mean"}], "outputs": [{"index": 0, "dtype": ["float16", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "FusedBN3", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "eps", "param_type": "optional", "type": "float"}], "inputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "data"}, {"index": 1, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "mean"}, {"index": 2, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "variance"}, {"index": 3, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "gamma"}, {"index": 4, "dtype": ["float32"], "format": ["NC1HWC0"], "name": "beta"}], "outputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "output"}]}
+{"op_name": "GatherV2", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "axis", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "params"}, {"index": 1, "dtype": ["int32", "int32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "indices"}], "outputs": [{"index": 0, "dtype": ["int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "output"}]}
+{"op_name": "Less", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float16"], "format": ["DefaultFormat", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float16", "float16"], "format": ["DefaultFormat", "NC1HWC0"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["bool", "bool"], "format": ["DefaultFormat", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Log", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "MatMul", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "transpose_a", "param_type": "optional", "type": "bool"}, {"name": "transpose_b", "param_type": "optional", "type": "bool"}], "inputs": [{"index": 0, "dtype": ["float16", "float32"], "format": ["DefaultFormat", "DefaultFormat"], "name": "x1"}, {"index": 1, "dtype": ["float16", "float32"], "format": ["DefaultFormat", "DefaultFormat"], "name": "x2"}], "outputs": [{"index": 0, "dtype": ["float16", "float32"], "format": ["DefaultFormat", "DefaultFormat"], "name": "output"}]}
+{"op_name": "BatchMatMul", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "transpose_a", "param_type": "optional", "type": "bool"}, {"name": "transpose_b", "param_type": "optional", "type": "bool"}], "inputs": [{"index": 0, "dtype": ["float16"], "format": ["FRACTAL_NZ"], "name": "x1"}, {"index": 1, "dtype": ["float16"], "format": ["FRACTAL_NZ"], "name": "x2"}], "outputs": [{"index": 0, "dtype": ["float16"], "format": ["FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "MaxPoolGradWithArgmax", "imply_type": "AutoDiff", "fusion_type": "CONVLUTION", "attr": [{"name": "pad_mode", "param_type": "optional", "type": "str"}, {"name": "window", "param_type": "optional", "type": "int"}, {"name": "pad", "param_type": "optional", "type": "int"}, {"name": "stride", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16", "float16"], "format": ["NC1HWC0", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float16", "float32"], "format": ["DefaultFormat", "DefaultFormat"], "name": "argmax"}, {"index": 2, "dtype": ["float16", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "grad"}], "outputs": [{"index": 0, "dtype": ["float16", "float32"], "format": ["NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "MaxPoolWithArgmax", "imply_type": "AutoDiff", "fusion_type": "CONVLUTION", "attr": [{"name": "pad_mode", "param_type": "optional", "type": "str"}, {"name": "window", "param_type": "optional", "type": "int"}, {"name": "pad", "param_type": "optional", "type": "int"}, {"name": "stride", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16"], "format": ["NC1HWC0"], "name": "output"}, {"index": 1, "dtype": ["float16"], "format": ["DefaultFormat"], "name": "argmax"}]}
+{"op_name": "Max", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "axis", "param_type": "required", "type": "listInt"}, {"name": "keep_dims", "param_type": "required", "type": "bool"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Maximum", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "param_type": "required", "name": "x"}, {"index": 1, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "param_type": "required", "name": "y"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "SimpleMeanGrad", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "input_shape", "param_type": "required", "type": "listInt"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "HEAD"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "SimpleMean", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Minimum", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}, {"index": 1, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "y"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "Mul", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "x_shape", "param_type": "required", "type": "listInt"}, {"name": "y_shape", "param_type": "required", "type": "listInt"}, {"name": "data_format", "param_type": "required", "type": "listStr"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32", "float16", "float32"], "format": ["FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}, {"index": 1, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32", "float16", "float32"], "format": ["FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "y"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32", "float16", "float32"], "format": ["FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "Neg", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "OneHot", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "depth", "param_type": "required", "type": "int"}, {"name": "axis", "param_type": "required", "type": "int"}], "inputs": [{"index": 0, "dtype": ["int32", "int32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "indices"}, {"index": 1, "dtype": ["int32", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "on_value"}, {"index": 2, "dtype": ["int32", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "off_value"}], "outputs": [{"index": 0, "dtype": ["int32", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "output"}]}
+{"op_name": "Pow", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "param_type": "required", "name": "x"}, {"index": 1, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "param_type": "required", "name": "power"}], "outputs": [{"index": 0, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "name": "output"}]}
+{"op_name": "RealDiv", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}, {"index": 1, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "y"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "Reciprocal", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "ReduceMax", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "axis", "param_type": "required", "type": "listInt"}, {"name": "keep_dims", "param_type": "required", "type": "bool"}], "inputs": [{"index": 0, "dtype": ["float16", "float16"], "format": ["DefaultFormat", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float16"], "format": ["DefaultFormat", "NC1HWC0"], "name": "output"}]}
+{"op_name": "ReduceMean", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "axis", "param_type": "required", "type": "listInt"}, {"name": "keep_dims", "param_type": "required", "type": "bool"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "ReduceSum", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "axis", "param_type": "required", "type": "listInt"}, {"name": "keep_dims", "param_type": "required", "type": "bool"}, {"name": "atomic_add", "param_type": "optional", "type": "str"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "ReluGrad", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0"], "name": "y_backprop"}, {"index": 1, "dtype": ["float16", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0"], "name": "output"}]}
+{"op_name": "ReLU", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Reshape", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "shape", "param_type": "required", "type": "listInt"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "tensor"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Round", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Rsqrt", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "param_type": "required", "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Select", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["bool", "bool", "bool", "bool", "bool", "bool"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "param_type": "required", "name": "condition"}, {"index": 1, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "param_type": "required", "name": "x"}, {"index": 2, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "param_type": "required", "name": "y"}], "outputs": [{"index": 0, "dtype": ["float16", "int32", "float16", "int32", "float32", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Softmax", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [{"name": "axis", "param_type": "required", "type": "listInt"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "SparseSoftmaxCrossEntropyWithLogits", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "is_grad", "param_type": "optional", "type": "bool"}, {"name": "sens", "param_type": "optional", "type": "float"}], "inputs": [{"index": 0, "dtype": ["float32"], "format": ["DefaultFormat"], "name": "features"}, {"index": 1, "dtype": ["int32"], "format": ["DefaultFormat"], "name": "labels"}], "outputs": [{"index": 0, "dtype": ["float32"], "format": ["DefaultFormat"], "name": "output"}]}
+{"op_name": "Sqrt", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "param_type": "required", "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "StridedSlice", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "begin", "param_type": "required", "type": "listInt"}, {"name": "end", "param_type": "required", "type": "listInt"}, {"name": "strides", "param_type": "required", "type": "listInt"}, {"name": "begin_mask", "param_type": "required", "type": "int"}, {"name": "end_mask", "param_type": "required", "type": "int"}, {"name": "ellipsis_mask", "param_type": "required", "type": "int"}, {"name": "new_axis_mask", "param_type": "required", "type": "int"}, {"name": "shrink_axis_mask", "param_type": "required", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Sub", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}, {"index": 1, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "y"}], "outputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "Sum", "imply_type": "AutoDiff", "fusion_type": "COMMREDUCE", "attr": [{"name": "axis", "param_type": "required", "type": "listInt"}, {"name": "keepdims", "param_type": "required", "type": "bool"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "param_type": "required", "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "output"}]}
+{"op_name": "Tile", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "multiples", "param_type": "required", "type": "listInt"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32", "float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "ZerosLike", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Argmax", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "axis", "param_type": "optional", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["int32", "int32", "int32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "FloorDiv", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["int32", "int32", "int32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "Equal", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["bool", "bool", "bool", "bool", "bool", "bool"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "GreaterEqual", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["bool", "bool", "bool", "bool", "bool", "bool"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "LessEqual", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["int32", "float16", "float32", "int32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["bool", "bool", "bool", "bool", "bool", "bool"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"], "name": "output"}]}
+{"op_name": "ExpandDims", "imply_type": "AutoDiff", "fusion_type": "OPAQUE", "attr": [{"name": "axis", "param_type": "required", "type": "int"}], "inputs": [{"index": 0, "dtype": ["float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "int32"], "format": ["DefaultFormat", "DefaultFormat", "DefaultFormat"], "name": "y"}]}
+{"op_name": "Greater", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float16", "float32", "float32"], "format": ["DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "name": "x"}, {"index": 1, "dtype": ["float16", "float16", "float32", "float32"], "format": ["DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "name": "y"}], "outputs": [{"index": 0, "dtype": ["bool", "bool", "bool", "bool"], "format": ["DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0"], "name": "output"}]}
+{"op_name": "EquivFormat", "imply_type": "AutoDiff", "fusion_type": "ELEMWISE", "attr": [], "inputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["DefaultFormat", "DefaultFormat", "FRACTAL_NZ", "FRACTAL_NZ"], "name": "x"}], "outputs": [{"index": 0, "dtype": ["float16", "float32", "float16", "float32"], "format": ["FRACTAL_NZ", "FRACTAL_NZ", "DefaultFormat", "DefaultFormat"], "name": "output"}]}
+{"op_name": "Cast", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [{"name": "dst_type", "param_type": "required", "type": "str"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["bool", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "Equal", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["bool", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "SimpleMean", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "SimpleMeanGrad", "inputs": [{"index": 0, "name": "HEAD"}], "outputs": [{"index": 0, "name": "output"}], "attr": [{"name": "input_shape", "param_type": "required", "type": "listInt"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "Mul", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "ReLU6", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "ReLU6Grad", "inputs": [{"index": 0, "name": "y_grad"}, {"index": 1, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "Squeeze", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "SqueezeGrad", "inputs": [{"index": 0, "name": "y_grad"}], "outputs": [{"index": 0, "name": "output"}], "attr": [{"name": "x_shape", "param_type": "required", "type": "listInt"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "Tile", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [{"name": "multiples", "param_type": "required", "type": "listInt"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "HSigmoid", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "HSigmoidGrad", "inputs": [{"index": 0, "name": "y_grad"}, {"index": 1, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "HSwish", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "HSwishGrad", "inputs": [{"index": 0, "name": "y_grad"}, {"index": 1, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "Sub", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "LogicalAnd", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "LogicalNot", "inputs": [{"index": 0, "name": "x"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "LogicalOr", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "LessEqual", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["bool", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "NotEqual", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["bool", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "GreaterEqual", "inputs": [{"index": 0, "name": "x"}, {"index": 1, "name": "y"}], "outputs": [{"index": 0, "name": "output"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["bool", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AutoDiff", "processor": "cuda"}
+{"op_name": "Abs", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "abs.so", "compute_cost": 10, "kernel_name": "abs", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "InplaceAdd", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "v", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "indices", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "inplace_add_d.so", "compute_cost": 10, "kernel_name": "inplace_add_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "InplaceSub", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "v", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "indices", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "inplace_sub_d.so", "compute_cost": 10, "kernel_name": "inplace_sub_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AbsGrad", "inputs": [{"index": 0, "name": "y", "param_type": "required"}, {"index": 1, "name": "dy", "param_type": "required"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "abs_grad.so", "compute_cost": 10, "kernel_name": "abs_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ACos", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "acos.so", "compute_cost": 10, "kernel_name": "acos", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "ACosGrad", "inputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "acos_grad.so", "compute_cost": 10, "kernel_name": "acos_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Acosh", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "acosh.so", "compute_cost": 10, "kernel_name": "acosh", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "AcoshGrad", "inputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "acosh_grad.so", "compute_cost": 10, "kernel_name": "acosh_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AdamApplyOneWithDecay", "inputs": [{"index": 0, "name": "input0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "input3", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "input4", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "mul0_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "mul1_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "mul2_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "mul3_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 9, "name": "mul4_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 10, "name": "add2_y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "output2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "adam_apply_one_with_decay.so", "compute_cost": 10, "kernel_name": "adam_apply_one_with_decay", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Add", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "add.so", "compute_cost": 10, "kernel_name": "add", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "ApplyCenteredRMSProp", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "mg", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "ms", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "mom", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "rho", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "momentum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "epsilon", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_centered_rms_prop.so", "compute_cost": 10, "kernel_name": "apply_centered_rms_prop", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AddN", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "n", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "add_n.so", "compute_cost": 10, "kernel_name": "add_n", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "AccumulateNV2", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "n", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""]], [["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "accumulate_n_v2.so", "compute_cost": 10, "kernel_name": "accumulate_n_v2", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ApplyFtrl", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "linear", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "l1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "l2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "lr_power", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "linear", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_ftrl.so", "compute_cost": 10, "kernel_name": "apply_ftrl", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyMomentum", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "momentum", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_nesterov", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_momentum.so", "compute_cost": 10, "kernel_name": "apply_momentum", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Adam", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "v", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "beta1_power", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "beta2_power", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "beta1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "beta2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "epsilon", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 9, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "v", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}, {"name": "use_nesterov", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_adam.so", "compute_cost": 10, "kernel_name": "apply_adam", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyAdaMax", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "v", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "beta1_power", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "beta1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "beta2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "epsilon", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "v", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_ada_max_d.so", "compute_cost": 10, "kernel_name": "apply_ada_max_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyAdadelta", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "accum_update", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "rho", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "epsilon", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "accum_update", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_adadelta_d.so", "compute_cost": 10, "kernel_name": "apply_adadelta_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyAdagrad", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "update_slots", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_adagrad_d.so", "compute_cost": 10, "kernel_name": "apply_adagrad_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyAdagradV2", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}, {"name": "update_slots", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_adagradv2_d.so", "compute_cost": 10, "kernel_name": "apply_adagradv2_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyAddSign", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "alpha", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "sign_decay", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "beta", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_add_sign_d.so", "compute_cost": 10, "kernel_name": "apply_add_sign_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyPowerSign", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "logbase", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "sign_decay", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "beta", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "m", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_power_sign_d.so", "compute_cost": 10, "kernel_name": "apply_power_sign_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyGradientDescent", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "alpha", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "delta", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_gradient_descent.so", "compute_cost": 10, "kernel_name": "apply_gradient_descent", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyProximalGradientDescent", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "alpha", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "l1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "l2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "delta", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_proximal_gradient_descent.so", "compute_cost": 10, "kernel_name": "apply_proximal_gradient_descent", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SparseApplyFtrlV2", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "linear", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "linear", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "lr", "param_type": "required", "type": "float", "value": "all"}, {"name": "l1", "param_type": "required", "type": "float", "value": "all"}, {"name": "l2", "param_type": "required", "type": "float", "value": "all"}, {"name": "l2_shrinkage", "param_type": "required", "type": "float", "value": "all"}, {"name": "lr_power", "param_type": "required", "type": "float", "value": "all"}, {"name": "use_locking", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["int32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sparse_apply_ftrl_v2_d.so", "compute_cost": 10, "kernel_name": "sparse_apply_ftrl_v2_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SparseApplyAdagradV2", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "lr", "param_type": "required", "type": "float", "value": "all"}, {"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}, {"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "update_slots", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["int32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sparse_apply_adagrad_v2_d.so", "compute_cost": 10, "kernel_name": "sparse_apply_adagrad_v2_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApproximateEqual", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "tolerance", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""], ["bool", ""]], [["float32", ""], ["float32", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "approximate_equal.so", "compute_cost": 10, "kernel_name": "approximate_equal", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "AdamApplyOne", "inputs": [{"index": 0, "name": "input0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "input3", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "input4", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "mul0_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "mul1_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "mul2_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "mul3_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 9, "name": "add2_y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "output2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "adam_apply_one.so", "compute_cost": 10, "kernel_name": "adam_apply_one", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Assign", "inputs": [{"index": 0, "name": "ref", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "value", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "ref", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["bool", "NC1HWC0"], ["bool", "NC1HWC0"], ["bool", "NC1HWC0"]], [["bool", "C1HWNCoC0"], ["bool", "C1HWNCoC0"], ["bool", "C1HWNCoC0"]], [["bool", "FracZ"], ["bool", "FracZ"], ["bool", "FracZ"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "C1HWNCoC0"], ["int8", "C1HWNCoC0"], ["int8", "C1HWNCoC0"]], [["int8", "FracZ"], ["int8", "FracZ"], ["int8", "FracZ"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "C1HWNCoC0"], ["uint8", "C1HWNCoC0"], ["uint8", "C1HWNCoC0"]], [["uint8", "FracZ"], ["uint8", "FracZ"], ["uint8", "FracZ"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int16", "NC1HWC0"], ["int16", "NC1HWC0"], ["int16", "NC1HWC0"]], [["int16", "C1HWNCoC0"], ["int16", "C1HWNCoC0"], ["int16", "C1HWNCoC0"]], [["int16", "FracZ"], ["int16", "FracZ"], ["int16", "FracZ"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint16", "NC1HWC0"], ["uint16", "NC1HWC0"], ["uint16", "NC1HWC0"]], [["uint16", "C1HWNCoC0"], ["uint16", "C1HWNCoC0"], ["uint16", "C1HWNCoC0"]], [["uint16", "FracZ"], ["uint16", "FracZ"], ["uint16", "FracZ"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "C1HWNCoC0"], ["int32", "C1HWNCoC0"], ["int32", "C1HWNCoC0"]], [["int32", "FracZ"], ["int32", "FracZ"], ["int32", "FracZ"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint32", "NC1HWC0"], ["uint32", "NC1HWC0"], ["uint32", "NC1HWC0"]], [["uint32", "C1HWNCoC0"], ["uint32", "C1HWNCoC0"], ["uint32", "C1HWNCoC0"]], [["uint32", "FracZ"], ["uint32", "FracZ"], ["uint32", "FracZ"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["int64", "NC1HWC0"], ["int64", "NC1HWC0"], ["int64", "NC1HWC0"]], [["int64", "C1HWNCoC0"], ["int64", "C1HWNCoC0"], ["int64", "C1HWNCoC0"]], [["int64", "FracZ"], ["int64", "FracZ"], ["int64", "FracZ"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["uint64", "NC1HWC0"], ["uint64", "NC1HWC0"], ["uint64", "NC1HWC0"]], [["uint64", "C1HWNCoC0"], ["uint64", "C1HWNCoC0"], ["uint64", "C1HWNCoC0"]], [["uint64", "FracZ"], ["uint64", "FracZ"], ["uint64", "FracZ"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "assign.so", "compute_cost": 10, "kernel_name": "assign", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AssignAdd", "inputs": [{"index": 0, "name": "ref", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "value", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "ref", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "C1HWNCoC0"], ["int8", "C1HWNCoC0"], ["int8", "C1HWNCoC0"]], [["int8", "FracZ"], ["int8", "FracZ"], ["int8", "FracZ"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "C1HWNCoC0"], ["uint8", "C1HWNCoC0"], ["uint8", "C1HWNCoC0"]], [["uint8", "FracZ"], ["uint8", "FracZ"], ["uint8", "FracZ"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "C1HWNCoC0"], ["int32", "C1HWNCoC0"], ["int32", "C1HWNCoC0"]], [["int32", "FracZ"], ["int32", "FracZ"], ["int32", "FracZ"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["int64", "NC1HWC0"], ["int64", "NC1HWC0"], ["int64", "NC1HWC0"]], [["int64", "C1HWNCoC0"], ["int64", "C1HWNCoC0"], ["int64", "C1HWNCoC0"]], [["int64", "FracZ"], ["int64", "FracZ"], ["int64", "FracZ"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "assignadd.so", "compute_cost": 10, "kernel_name": "assignadd", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AssignSub", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "value", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "C1HWNCoC0"], ["int8", "C1HWNCoC0"], ["int8", "C1HWNCoC0"]], [["int8", "FracZ"], ["int8", "FracZ"], ["int8", "FracZ"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "C1HWNCoC0"], ["uint8", "C1HWNCoC0"], ["uint8", "C1HWNCoC0"]], [["uint8", "FracZ"], ["uint8", "FracZ"], ["uint8", "FracZ"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "C1HWNCoC0"], ["int32", "C1HWNCoC0"], ["int32", "C1HWNCoC0"]], [["int32", "FracZ"], ["int32", "FracZ"], ["int32", "FracZ"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "assign_sub.so", "compute_cost": 10, "kernel_name": "assign_sub", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BatchMatMul", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "bias", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "transpose_x1", "param_type": "required", "type": "bool", "value": "all"}, {"name": "transpose_x2", "param_type": "required", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "batch_matmul.so", "compute_cost": 10, "kernel_name": "batch_matmul", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "BatchNorm", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "offset", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "mean", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 4, "name": "variance", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "batch_mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "reserve_space_1", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 4, "name": "reserve_space_2", "need_compile": false, "param_type": "optional", "shape": "all"}], "attr": [{"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}, {"name": "is_training", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "batch_norm.so", "compute_cost": 10, "kernel_name": "batch_norm", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BatchNormGrad", "inputs": [{"index": 0, "name": "y_backprop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "reserve_space_1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "reserve_space_2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "x_backprop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "scale_backprop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "offset_backprop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "reserve_space_4", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 4, "name": "reserve_space_5", "need_compile": false, "param_type": "optional", "shape": "all"}], "attr": [{"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}, {"name": "is_training", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "batchnormgrad.so", "compute_cost": 10, "kernel_name": "batchnormgrad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BiasAdd", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "bias", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "data_format", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "COMMREDUCE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bias_add.so", "compute_cost": 10, "kernel_name": "bias_add", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "BiasAddGrad", "inputs": [{"index": 0, "name": "output_backprop", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "data_format", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "COMMREDUCE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FRACTAL_NZ"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FRACTAL_NZ"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "biasaddgrad.so", "compute_cost": 10, "kernel_name": "biasaddgrad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Cast", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "dst_type", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", ""], ["float16", ""]], [["bool", ""], ["uint8", ""]], [["bool", ""], ["float32", ""]], [["bool", ""], ["int32", ""]], [["int8", ""], ["float16", ""]], [["int8", ""], ["float32", ""]], [["int8", ""], ["int32", ""]], [["uint8", ""], ["float16", ""]], [["uint8", ""], ["float32", ""]], [["uint8", ""], ["int32", ""]], [["int32", ""], ["bool", ""]], [["int32", ""], ["float16", ""]], [["int32", ""], ["float32", ""]], [["int32", ""], ["int8", ""]], [["int32", ""], ["uint8", ""]], [["float16", ""], ["uint8", ""]], [["float16", ""], ["float32", ""]], [["float16", ""], ["int32", ""]], [["float32", ""], ["float16", ""]], [["float32", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "cast.so", "compute_cost": 10, "kernel_name": "cast", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Conv2D", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "filter", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "bias", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 3, "name": "offset_w", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "stride", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_list", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "dilation", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "offset_a", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""], ["int8", ""], ["float16", ""]], [["int8", ""], ["int8", ""], ["int32", ""], ["int8", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "conv2d.so", "compute_cost": 10, "kernel_name": "conv2d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "Conv2DBackpropFilter", "inputs": [{"index": 0, "name": "out_backprop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "filter_sizes", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "stride", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_list", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "dilation", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "groups", "param_type": "optional", "type": "int", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "conv2d_backprop_filter_d.so", "compute_cost": 10, "kernel_name": "conv2d_backprop_filter_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Conv2DBackpropInput", "inputs": [{"index": 0, "name": "out_backprop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "filter", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "input_sizes", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "stride", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_list", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "dilation", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "group", "param_type": "optional", "type": "int", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "FracZ"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "conv2d_backprop_input_d.so", "compute_cost": 10, "kernel_name": "conv2d_backprop_input_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ConfusionMulGrad", "inputs": [{"index": 0, "name": "input0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output1", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "required", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "", "compute_cost": 10, "kernel_name": "", "partial_flag": false, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "DropoutDoMask", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "mask", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "keep_prob", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "drop_out_do_mask.so", "compute_cost": 10, "kernel_name": "drop_out_do_mask", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "Gelu", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu.so", "compute_cost": 10, "kernel_name": "gelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "GeluGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gelu_grad.so", "compute_cost": 10, "kernel_name": "gelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MaxPool", "inputs": [{"index": 0, "name": "input_data", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output_data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}, {"name": "data_format", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool.so", "compute_cost": 10, "kernel_name": "max_pool", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MaxPoolGrad", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad.so", "compute_cost": 10, "kernel_name": "max_pool_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MaxPoolGradWithArgmax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "argmax", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["uint16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["int64", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad_with_argmax.so", "compute_cost": 10, "kernel_name": "max_pool_grad_with_argmax", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MaxPoolWithArgmax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "argmax", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["uint16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_with_argmax.so", "compute_cost": 10, "kernel_name": "max_pool_with_argmax", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Mul", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "mul.so", "compute_cost": 10, "kernel_name": "mul", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "RealDiv", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "realdiv.so", "compute_cost": 10, "kernel_name": "realdiv", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ReLU", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""]], [["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "relu.so", "compute_cost": 10, "kernel_name": "relu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "ReluGrad", "inputs": [{"index": 0, "name": "gradients", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "features", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "backprops", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "relugrad.so", "compute_cost": 10, "kernel_name": "relugrad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ReLU6", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "relu6.so", "compute_cost": 10, "kernel_name": "relu6", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "ReLU6Grad", "inputs": [{"index": 0, "name": "gradients", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "features", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "backprops", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "relu6_grad.so", "compute_cost": 10, "kernel_name": "relu6_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ReLUV2", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "mask", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["uint8", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["uint8", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["uint8", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["uint8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "relu_v2.so", "compute_cost": 10, "kernel_name": "relu_v2", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ReluGradV2", "inputs": [{"index": 0, "name": "gradients", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "mask", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "backprops", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["uint8", "DefaultFormat"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["uint8", "DefaultFormat"], ["float32", "NC1HWC0"]], [["int32", "NC1HWC0"], ["uint8", "DefaultFormat"], ["int32", "NC1HWC0"]], [["int8", "NC1HWC0"], ["uint8", "DefaultFormat"], ["int8", "NC1HWC0"]], [["uint8", "NC1HWC0"], ["uint8", "DefaultFormat"], ["uint8", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "relu_grad_v2.so", "compute_cost": 10, "kernel_name": "relu_grad_v2", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SoftmaxCrossEntropyWithLogits", "inputs": [{"index": 0, "name": "input_features", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input_labels", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output_loss", "need_compile": true, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output_backprop", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "softmax_cross_entropy_with_logits.so", "compute_cost": 10, "kernel_name": "softmax_cross_entropy_with_logits", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SigmoidCrossEntropyWithLogits", "inputs": [{"index": 0, "name": "predict", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "target", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "loss", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sigmoid_cross_entropy_with_logits.so", "compute_cost": 10, "kernel_name": "sigmoid_cross_entropy_with_logits", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SigmoidCrossEntropyWithLogitsGrad", "inputs": [{"index": 0, "name": "predict", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "target", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "dout", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "gradient", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sigmoid_cross_entropy_with_logits_grad.so", "compute_cost": 10, "kernel_name": "sigmoid_cross_entropy_with_logits_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "TensorAdd", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "add.so", "compute_cost": 10, "kernel_name": "add", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "TransData", "inputs": [{"index": 0, "name": "src", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "dst", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "src_format", "param_type": "required", "type": "str", "value": "DefaultFormat, NC1HWC0, FracZ, FRACTAL_NZ, HWCN, C1HWNCoC0, NDHWC, NHWC"}, {"name": "dst_format", "param_type": "required", "type": "str", "value": "DefaultFormat, NC1HWC0, FracZ, FRACTAL_NZ, HWCN, C1HWNCoC0, NDHWC, NHWC"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NHWC"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NHWC"]], [["float32", "NC1HWC0"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "FracZ"]], [["float32", "HWCN"], ["float32", "FracZ"]], [["float32", "FracZ"], ["float32", "HWCN"]], [["float32", "C1HWNCoC0"], ["float32", "HWCN"]], [["float32", "HWCN"], ["float32", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "FracZ"]], [["float16", "NHWC"], ["float16", "FracZ"]], [["float16", "HWCN"], ["float16", "FracZ"]], [["float16", "DefaultFormat"], ["float16", "NC1HWC0"]], [["float16", "NHWC"], ["float16", "NC1HWC0"]], [["float16", "HWCN"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NHWC"]], [["float16", "NC1HWC0"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "FracZ"]], [["float16", "HWCN"], ["float16", "FracZ"]], [["float16", "FracZ"], ["float16", "HWCN"]], [["float16", "C1HWNCoC0"], ["float16", "HWCN"]], [["float16", "HWCN"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "FRACTAL_NZ"]], [["float16", "FRACTAL_NZ"], ["float16", "DefaultFormat"]], [["float32", "FRACTAL_NZ"], ["float32", "DefaultFormat"]], [["bool", "NHWC"], ["bool", "NC1HWC0"]], [["bool", "DefaultFormat"], ["bool", "NC1HWC0"]], [["bool", "NC1HWC0"], ["bool", "NHWC"]], [["bool", "NC1HWC0"], ["bool", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "NHWC"]], [["float16", "DefaultFormat"], ["float16", "HWCN"]], [["float16", "NHWC"], ["float16", "DefaultFormat"]], [["float16", "NHWC"], ["float16", "HWCN"]], [["float16", "HWCN"], ["float16", "DefaultFormat"]], [["float16", "HWCN"], ["float16", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "HWCN"]], [["float32", "NHWC"], ["float32", "DefaultFormat"]], [["float32", "NHWC"], ["float32", "HWCN"]], [["float32", "HWCN"], ["float32", "DefaultFormat"]], [["float32", "HWCN"], ["float32", "NHWC"]], [["int8", "DefaultFormat"], ["int8", "FRACTAL_NZ"]], [["int8", "DefaultFormat"], ["int8", "FracZ"]], [["int8", "DefaultFormat"], ["int8", "NHWC"]], [["int8", "DefaultFormat"], ["int8", "HWCN"]], [["int8", "NHWC"], ["int8", "DefaultFormat"]], [["int8", "NHWC"], ["int8", "HWCN"]], [["int8", "HWCN"], ["int8", "DefaultFormat"]], [["int8", "HWCN"], ["int8", "NHWC"]], [["int16", "DefaultFormat"], ["int16", "NHWC"]], [["int16", "DefaultFormat"], ["int16", "HWCN"]], [["int16", "NHWC"], ["int16", "DefaultFormat"]], [["int16", "NHWC"], ["int16", "HWCN"]], [["int16", "HWCN"], ["int16", "DefaultFormat"]], [["int16", "HWCN"], ["int16", "NHWC"]], [["int32", "DefaultFormat"], ["int32", "NHWC"]], [["int32", "DefaultFormat"], ["int32", "HWCN"]], [["int32", "NHWC"], ["int32", "DefaultFormat"]], [["int32", "NHWC"], ["int32", "HWCN"]], [["int32", "HWCN"], ["int32", "DefaultFormat"]], [["int32", "HWCN"], ["int32", "NHWC"]], [["int64", "DefaultFormat"], ["int64", "NHWC"]], [["int64", "DefaultFormat"], ["int64", "HWCN"]], [["int64", "NHWC"], ["int64", "DefaultFormat"]], [["int64", "NHWC"], ["int64", "HWCN"]], [["int64", "HWCN"], ["int64", "DefaultFormat"]], [["int64", "HWCN"], ["int64", "NHWC"]], [["uint8", "DefaultFormat"], ["uint8", "NHWC"]], [["uint8", "DefaultFormat"], ["uint8", "HWCN"]], [["uint8", "NHWC"], ["uint8", "DefaultFormat"]], [["uint8", "NHWC"], ["uint8", "HWCN"]], [["uint8", "HWCN"], ["uint8", "DefaultFormat"]], [["uint8", "HWCN"], ["uint8", "NHWC"]], [["uint16", "DefaultFormat"], ["uint16", "NHWC"]], [["uint16", "DefaultFormat"], ["uint16", "HWCN"]], [["uint16", "NHWC"], ["uint16", "DefaultFormat"]], [["uint16", "NHWC"], ["uint16", "HWCN"]], [["uint16", "HWCN"], ["uint16", "DefaultFormat"]], [["uint16", "HWCN"], ["uint16", "NHWC"]], [["uint32", "DefaultFormat"], ["uint32", "NHWC"]], [["uint32", "DefaultFormat"], ["uint32", "HWCN"]], [["uint32", "NHWC"], ["uint32", "DefaultFormat"]], [["uint32", "NHWC"], ["uint32", "HWCN"]], [["uint32", "HWCN"], ["uint32", "DefaultFormat"]], [["uint32", "HWCN"], ["uint32", "NHWC"]], [["uint64", "DefaultFormat"], ["uint64", "NHWC"]], [["uint64", "DefaultFormat"], ["uint64", "HWCN"]], [["uint64", "NHWC"], ["uint64", "DefaultFormat"]], [["uint64", "NHWC"], ["uint64", "HWCN"]], [["uint64", "HWCN"], ["uint64", "DefaultFormat"]], [["uint64", "HWCN"], ["uint64", "NHWC"]], [["int32", "FRACTAL_NZ"], ["int32", "DefaultFormat"]], [["float16", "NDHWC"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NDHWC"]], [["int8", "HWCN"], ["int8", "C1HWNCoC0"]], [["float16", "HWCN"], ["float16", "FracZ"]], [["float16", "FracZ"], ["float16", "HWCN"]], [["float16", "HWCN"], ["float16", "FRACTAL_NZ"]], [["float32", "HWCN"], ["float16", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "trans_data.so", "compute_cost": 10, "kernel_name": "trans_data", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "TopK", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "assist_seq", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "values", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "dim", "param_type": "optional", "type": "int", "value": "all"}, {"name": "k", "param_type": "required", "type": "int", "value": "all"}, {"name": "largest", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "sorted", "param_type": "optional", "type": "bool", "value": "true"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "top_k_d.so", "compute_cost": 10, "kernel_name": "top_k_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MatMul", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "bias", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 3, "name": "offset_w", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "transpose_x1", "param_type": "required", "type": "bool", "value": "all"}, {"name": "transpose_x2", "param_type": "required", "type": "bool", "value": "all"}, {"name": "offset_x", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "DYNAMIC", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "DefaultFormat"], ["int8", "DefaultFormat"], ["float16", "FRACTAL_NZ"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float32", "DefaultFormat"], ["int8", "DefaultFormat"], ["float32", "FRACTAL_NZ"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int8", "DefaultFormat"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int8", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NHWC"], ["int32", "NHWC"], ["int32", "NHWC"], ["int8", "DefaultFormat"], ["int32", "NHWC"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "matmul.so", "compute_cost": 10, "kernel_name": "matmul", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Sub", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sub.so", "compute_cost": 10, "kernel_name": "sub", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ReduceMeanD", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reduce_mean_d.so", "compute_cost": 10, "kernel_name": "reduce_mean_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "reduce"}
+{"op_name": "ScatterNd", "inputs": [{"index": 0, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "shape", "param_type": "optional", "type": "listInt", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_nd_d.so", "compute_cost": 10, "kernel_name": "scatter_nd_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterNdD", "inputs": [{"index": 0, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "shape", "param_type": "optional", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_nd_d.so", "compute_cost": 10, "kernel_name": "scatter_nd_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ReduceMean", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reduce_mean.so", "compute_cost": 10, "kernel_name": "reduce_mean", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "reduce"}
+{"op_name": "Tile", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "multiples", "param_type": "optional", "type": "listInt", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "tile_d.so", "compute_cost": 10, "kernel_name": "tile_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AtomicAddrClean", "inputs": [], "outputs": [], "attr": [{"name": "automic_add_mem_size", "param_type": "required", "type": "listUInt64", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [], "imply_type": "TBE", "async_flag": false, "binfile_name": "atomic_addr_clean.so", "compute_cost": 10, "kernel_name": "atomic_addr_clean", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "GatherV2", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int64", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int32", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "NC1HWC0"], ["int64", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "FracZ"], ["int32", "FracZ"], ["int8", "FracZ"]], [["int8", "FracZ"], ["int64", "FracZ"], ["int8", "FracZ"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int64", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["int32", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "NC1HWC0"], ["int64", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "FracZ"], ["int32", "FracZ"], ["uint8", "FracZ"]], [["uint8", "FracZ"], ["int64", "FracZ"], ["uint8", "FracZ"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int64", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "NC1HWC0"], ["int64", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "FracZ"], ["int32", "FracZ"], ["int32", "FracZ"]], [["int32", "FracZ"], ["int64", "FracZ"], ["int32", "FracZ"]], [["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "DefaultFormat"], ["int64", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["int32", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["int64", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["int32", "FracZ"], ["float16", "FracZ"]], [["float16", "FracZ"], ["int64", "FracZ"], ["float16", "FracZ"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int64", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["int32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NC1HWC0"], ["int64", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["int32", "FracZ"], ["float32", "FracZ"]], [["float32", "FracZ"], ["int64", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gather_v2_d.so", "compute_cost": 10, "kernel_name": "gather_v2_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "GatherNd", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int64", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int64", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "DefaultFormat"], ["int64", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int64", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int64", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["bool", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]], [["bool", "DefaultFormat"], ["int64", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gather_nd.so", "compute_cost": 10, "kernel_name": "gather_nd", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BNTrainingReduce", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}], "outputs": [{"index": 0, "name": "sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "square_sum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float32", ""], ["float32", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_training_reduce.so", "compute_cost": 10, "kernel_name": "bn_training_reduce", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "BNTrainingReduceGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "x_norm", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 2, "name": "diff_scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "diff_offset", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "batch_mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}], "attr": [{"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_training_reduce_grad.so", "compute_cost": 10, "kernel_name": "bn_training_reduce_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "BNTrainingUpdate", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "square_sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "offset", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "variance", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "variance", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "batch_mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "factor", "param_type": "optional", "type": "float", "value": "all"}, {"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}, {"name": "isRef", "param_type": "optional", "type": "bool", "value": "all", "default_value": "true"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_training_update.so", "compute_cost": 10, "kernel_name": "bn_training_update", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BNTrainingUpdateGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 2, "name": "batch_mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "diff_scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "diff_offset", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_training_update_grad.so", "compute_cost": 10, "kernel_name": "bn_training_update_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "BNInfer", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "offset", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "variance", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}], "attr": [{"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_infer.so", "compute_cost": 10, "kernel_name": "bn_infer", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BNInferGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "x_backprop", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}], "attr": [{"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_infer_grad.so", "compute_cost": 10, "kernel_name": "bn_infer_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Reciprocal", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reciprocal.so", "compute_cost": 10, "kernel_name": "reciprocal", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "StridedSlice", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "begin", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "end", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "begin_mask", "param_type": "required", "type": "int", "value": "all"}, {"name": "end_mask", "param_type": "required", "type": "int", "value": "all"}, {"name": "ellipsis_mask", "param_type": "required", "type": "int", "value": "all"}, {"name": "new_axis_mask", "param_type": "required", "type": "int", "value": "all"}, {"name": "shrink_axis_mask", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "strided_slice_d.so", "compute_cost": 10, "kernel_name": "strided_slice_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "StridedSliceGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "shapex", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "begin", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "end", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "begin_mask", "param_type": "optional", "type": "int", "value": "all"}, {"name": "end_mask", "param_type": "optional", "type": "int", "value": "all"}, {"name": "ellipsis_mask", "param_type": "optional", "type": "int", "value": "all"}, {"name": "new_axis_mask", "param_type": "optional", "type": "int", "value": "all"}, {"name": "shrink_axis_mask", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "strided_slice_grad_d.so", "compute_cost": 10, "kernel_name": "strided_slice_grad_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Split", "inputs": [{"index": 0, "name": "value", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "output_num", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "split_d.so", "compute_cost": 10, "kernel_name": "split_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "Exp", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "exp.so", "compute_cost": 10, "kernel_name": "exp", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Expm1", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "expm1.so", "compute_cost": 10, "kernel_name": "expm1", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Elu", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "alpha", "param_type": "optional", "type": "float", "value": "all", "default_value": "1.0"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "elu.so", "compute_cost": 10, "kernel_name": "elu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "EluGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "activations", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "elu_grad.so", "compute_cost": 10, "kernel_name": "elu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Div", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "div.so", "compute_cost": 10, "kernel_name": "div", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "Log", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "log.so", "compute_cost": 10, "kernel_name": "log", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "FloorDiv", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "floordiv.so", "compute_cost": 10, "kernel_name": "floordiv", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ZerosLike", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["bool", ""], ["bool", ""]], [["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "zeros_like.so", "compute_cost": 10, "kernel_name": "zeros_like", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Neg", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "neg.so", "compute_cost": 10, "kernel_name": "neg", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "NPUClearFloatStatus", "inputs": [{"index": 0, "name": "addr", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "n_p_u_clear_float_status.so", "compute_cost": 10, "kernel_name": "n_p_u_clear_float_status", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "NPUGetFloatStatus", "inputs": [{"index": 0, "name": "addr", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "n_p_u_get_float_status.so", "compute_cost": 10, "kernel_name": "n_p_u_get_float_status", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "NPUAllocFloatStatus", "inputs": [], "outputs": [{"index": 0, "name": "data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "n_p_u_alloc_float_status.so", "compute_cost": 10, "kernel_name": "n_p_u_alloc_float_status", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "OneHot", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "on_value", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "off_value", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "depth", "param_type": "required", "type": "int", "value": "all"}, {"name": "axis", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["uint8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "one_hot.so", "compute_cost": 10, "kernel_name": "one_hot", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Equal", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["bool", ""]], [["uint8", ""], ["uint8", ""], ["bool", ""]], [["int32", ""], ["int32", ""], ["bool", ""]], [["float16", ""], ["float16", ""], ["bool", ""]], [["float32", ""], ["float32", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "equal.so", "compute_cost": 10, "kernel_name": "equal", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "Less", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""], ["bool", ""]], [["uint8", ""], ["uint8", ""], ["bool", ""]], [["int32", ""], ["int32", ""], ["bool", ""]], [["float16", ""], ["float16", ""], ["bool", ""]], [["float32", ""], ["float32", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "less.so", "compute_cost": 10, "kernel_name": "less", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "LessEqual", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "begin_norm_axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "begin_params_axis", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""], ["bool", ""]], [["uint8", ""], ["uint8", ""], ["bool", ""]], [["int32", ""], ["int32", ""], ["bool", ""]], [["float16", ""], ["float16", ""], ["bool", ""]], [["float32", ""], ["float32", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "less_equal.so", "compute_cost": 10, "kernel_name": "less_equal", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "LogicalAnd", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["bool", ""], ["bool", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "logical_and.so", "compute_cost": 10, "kernel_name": "logical_and", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "LogicalNot", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["bool", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "logical_not.so", "compute_cost": 10, "kernel_name": "logical_not", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "LogicalOr", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["bool", ""], ["bool", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "logical_or.so", "compute_cost": 10, "kernel_name": "logical_or", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ReduceMax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", ""], ["bool", ""]], [["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reduce_max_d.so", "compute_cost": 10, "kernel_name": "reduce_max_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "reduce"}
+{"op_name": "ReduceMin", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "required", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reduce_min_d.so", "compute_cost": 10, "kernel_name": "reduce_min_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "reduce"}
+{"op_name": "ReduceSum", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reduce_sum_d.so", "compute_cost": 10, "kernel_name": "reduce_sum_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "reduce"}
+{"op_name": "Round", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "round.so", "compute_cost": 10, "kernel_name": "round", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Tanh", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "tanh.so", "compute_cost": 10, "kernel_name": "tanh", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "TanhGrad", "inputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "tanh_grad.so", "compute_cost": 10, "kernel_name": "tanh_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Softmax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "softmax.so", "compute_cost": 10, "kernel_name": "softmax", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Softsign", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "softsign.so", "compute_cost": 10, "kernel_name": "softsign", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Softplus", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "softplus.so", "compute_cost": 10, "kernel_name": "softplus", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "SoftplusGrad", "inputs": [{"index": 0, "name": "gradients", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "features", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "backprops", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "softplus_grad.so", "compute_cost": 10, "kernel_name": "softplus_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "SoftmaxGradExt", "inputs": [{"index": 0, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "keepdims", "param_type": "required", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "softmax_grad_ext.so", "compute_cost": 10, "kernel_name": "softmax_grad_ext", "partial_flag": true, "reshape_type": "", "dynamic_format": true, "op_pattern": "dynamicFormat"}
+{"op_name": "Square", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "square.so", "compute_cost": 10, "kernel_name": "square", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Sqrt", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sqrt.so", "compute_cost": 10, "kernel_name": "sqrt", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "SparseApplyFtrl", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "linear", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "linear", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "lr", "param_type": "required", "type": "float", "value": "all"}, {"name": "l1", "param_type": "required", "type": "float", "value": "all"}, {"name": "l2", "param_type": "required", "type": "float", "value": "all"}, {"name": "lr_power", "param_type": "required", "type": "float", "value": "all"}, {"name": "use_locking", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["int32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sparse_apply_ftrl.so", "compute_cost": 10, "kernel_name": "sparse_apply_ftrl", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SparseApplyProximalAdagrad", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "l1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "l2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["int16", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["int16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int16", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["int16", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["int32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["int32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["int32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["int64", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["int64", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int64", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int64", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["int64", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["uint16", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["uint16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["uint16", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["uint16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["uint16", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["uint32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["uint32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["uint32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["uint32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["uint32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["uint64", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["uint64", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["uint64", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["uint64", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["uint64", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sparse_apply_proximal_adagrad.so", "compute_cost": 10, "kernel_name": "sparse_apply_proximal_adagrad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyProximalAdagrad", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "l1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "l2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_proximal_adagrad_d.so", "compute_cost": 10, "kernel_name": "apply_proximal_adagrad_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Transpose", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "perm", "param_type": "optional", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "transpose_d.so", "compute_cost": 10, "kernel_name": "transpose_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "UnsortedSegmentSum", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "segment_ids", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "num_segments", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "unsorted_segment_sum_d.so", "compute_cost": 10, "kernel_name": "unsorted_segment_sum_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "UnsortedSegmentProd", "inputs": [{"index": 0, "name": "data", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "segment_ids", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "num_segments", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["int32", "DefaultFormat"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["int32", "DefaultFormat"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["int32", "DefaultFormat"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["int32", "DefaultFormat"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["int32", "DefaultFormat"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["int32", "DefaultFormat"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "DefaultFormat"], ["int32", "NC1HWC0"]], [["int32", "FracZ"], ["int32", "DefaultFormat"], ["int32", "FracZ"]], [["int32", "C1HWNCoC0"], ["int32", "DefaultFormat"], ["int32", "C1HWNCoC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "unsorted_segment_prod_d.so", "compute_cost": 10, "kernel_name": "unsorted_segment_prod_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LogSoftmaxGrad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "log_softmax_grad.so", "compute_cost": 10, "kernel_name": "log_softmax_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LogSoftmax", "inputs": [{"index": 0, "name": "logits", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "logsoftmax", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "log_softmax.so", "compute_cost": 10, "kernel_name": "log_softmax", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Select", "inputs": [{"index": 0, "name": "condition", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "select.so", "compute_cost": 10, "kernel_name": "select", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "Pow", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "pow.so", "compute_cost": 10, "kernel_name": "pow", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "Maximum", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "maximum.so", "compute_cost": 10, "kernel_name": "maximum", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "Minimum", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "minimum.so", "compute_cost": 10, "kernel_name": "minimum", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "MinimumGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "grad_x", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "grad_y", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "minimum_grad.so", "compute_cost": 10, "kernel_name": "minimum_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "MaximumGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "grad_x", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "grad_y", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "maximum_grad.so", "compute_cost": 10, "kernel_name": "maximum_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "Concat", "inputs": [{"index": 0, "name": "input_values", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "outputs": [{"index": 0, "name": "output_data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "concat_d.so", "compute_cost": 10, "kernel_name": "concat_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "Slice", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "begin", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "size", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "slice_d.so", "compute_cost": 10, "kernel_name": "slice_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Sign", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sign.so", "compute_cost": 10, "kernel_name": "sign", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Greater", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""], ["bool", ""]], [["uint8", ""], ["uint8", ""], ["bool", ""]], [["int32", ""], ["int32", ""], ["bool", ""]], [["float16", ""], ["float16", ""], ["bool", ""]], [["float32", ""], ["float32", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "greater.so", "compute_cost": 10, "kernel_name": "greater", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ClipByNormNoDivSum", "inputs": [{"index": 0, "name": "input_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "input3", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output_y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "clip_by_norm_no_div_sum.so", "compute_cost": 10, "kernel_name": "clip_by_norm_no_div_sum", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ClipByValue", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "clip_value_min", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "clip_value_max", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "dst_type", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["int32", ""], ["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "clip_by_value.so", "compute_cost": 10, "kernel_name": "clip_by_value", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "LayerNormBetaGammaBackprop", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "variance", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "mean", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "pd_gamma", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "pd_beta", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "shape_gamma", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""], ["float32", ""], ["float32", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "layer_norm_beta_gamma_backprop.so", "compute_cost": 10, "kernel_name": "layer_norm_beta_gamma_backprop", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "LayerNorm", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "gamma", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "beta", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "variance", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "begin_norm_axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "begin_params_axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "layer_norm.so", "compute_cost": 10, "kernel_name": "layer_norm", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "LayerNormGrad", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "variance", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "gamma", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "pd_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "pd_gamma", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "pd_beta", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "layer_norm_grad.so", "compute_cost": 10, "kernel_name": "layer_norm_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LayerNormXBackprop", "inputs": [{"index": 0, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "variance", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "gamma", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "pd_x", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "layer_norm_x_backprop.so", "compute_cost": 10, "kernel_name": "layer_norm_x_backprop", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "L2Loss", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "DefaultFormat"]], [["float16", "FRACTAL_NZ"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "DefaultFormat"]], [["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "DefaultFormat"]], [["float32", "FRACTAL_NZ"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "DefaultFormat"]], [["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "l2_loss.so", "compute_cost": 10, "kernel_name": "l2_loss", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "L2Normalize", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "l2_normalize.so", "compute_cost": 10, "kernel_name": "l2_normalize", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "L2NormalizeGrad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "dx", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "l2_normalize_grad.so", "compute_cost": 10, "kernel_name": "l2_normalize_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SquareSumV1", "inputs": [{"index": 0, "name": "input_x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output1", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "square_sum_v1.so", "compute_cost": 10, "kernel_name": "square_sum_v1", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SquareSumV2", "inputs": [{"index": 0, "name": "input_x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "square_sum_v2.so", "compute_cost": 10, "kernel_name": "square_sum_v2", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ConfusionTransposeD", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "perm", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "shape", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "transpose_first", "param_type": "required", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "confusion_transpose_d.so", "compute_cost": 10, "kernel_name": "confusion_transpose_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "ConfusionSoftmaxGrad", "inputs": [{"index": 0, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "confusion_softmax_grad.so", "compute_cost": 10, "kernel_name": "confusion_softmax_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LambUpdateWithLrV2", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x3", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "x4", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "x5", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "greater_y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "select_e", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lamb_update_with_lr_v2.so", "compute_cost": 10, "kernel_name": "lamb_update_with_lr_v2", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LambNextMV", "inputs": [{"index": 0, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input3", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "input4", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "input5", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "input6", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "input7", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "input8", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "input9", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 9, "name": "inputx0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 10, "name": "inputx1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 11, "name": "inputx2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 12, "name": "inputx3", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "output3", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "output4", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lamb_next_m_v.so", "compute_cost": 10, "kernel_name": "lamb_next_m_v", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LambNextMVWithDecay", "inputs": [{"index": 0, "name": "input_mul3", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input_mul2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input_realdiv1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "input_mul1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "input_mul0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "input_realdiv0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "input_mul4", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "mul0_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "mul1_sub", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 9, "name": "mul2_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 10, "name": "mul3_sub1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 11, "name": "mul4_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 12, "name": "add2_y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y1", "need_compile": true, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y2", "need_compile": true, "param_type": "required", "shape": "all"}, {"index": 2, "name": "y3", "need_compile": true, "param_type": "required", "shape": "all"}, {"index": 3, "name": "y4", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lamb_next_m_v_with_decay.so", "compute_cost": 10, "kernel_name": "lamb_next_m_v_with_decay", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LambUpdateWithLR", "inputs": [{"index": 0, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input3", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "input4", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "input5", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "input6", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "input7", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "input8", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "input9", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output_y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lamb_update_with_lr.so", "compute_cost": 10, "kernel_name": "lamb_update_with_lr", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Rsqrt", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "rsqrt.so", "compute_cost": 10, "kernel_name": "rsqrt", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Sigmoid", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sigmoid.so", "compute_cost": 10, "kernel_name": "sigmoid", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "SigmoidGrad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sigmoid_grad.so", "compute_cost": 10, "kernel_name": "sigmoid_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ResizeNearestNeighbor", "inputs": [{"index": 0, "name": "images", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "align_corners", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "resize_nearest_neighbor_d.so", "compute_cost": 10, "kernel_name": "resize_nearest_neighbor_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ResizeNearestNeighborGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "align_corners", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "resize_nearest_neighbor_grad_d.so", "compute_cost": 10, "kernel_name": "resize_nearest_neighbor_grad_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Pad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "paddings", "param_type": "optional", "type": "listListInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "pad_d.so", "compute_cost": 10, "kernel_name": "pad_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ArgMaxWithValue", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "indice", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "values", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "arg_max_with_value.so", "compute_cost": 10, "kernel_name": "arg_max_with_value", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ArgMinWithValue", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "indice", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "values", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "arg_min_with_value.so", "compute_cost": 10, "kernel_name": "arg_min_with_value", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SmoothL1Loss", "inputs": [{"index": 0, "name": "predict", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "label", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "loss", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "sigma", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "smooth_l1_loss.so", "compute_cost": 10, "kernel_name": "smooth_l1_loss", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SmoothL1LossGrad", "inputs": [{"index": 0, "name": "predict", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "label", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "dout", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "loss", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "sigma", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "smooth_l1_loss_grad.so", "compute_cost": 10, "kernel_name": "smooth_l1_loss_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "FusedMulAdd", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x3", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fused_mul_add.so", "compute_cost": 10, "kernel_name": "fused_mul_add", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "FusedMulAddN", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x3", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fused_mul_add_n.so", "compute_cost": 10, "kernel_name": "fused_mul_add_n", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "FusedMulApplyMomentum", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "momentum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_nesterov", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fused_mul_apply_momentum.so", "compute_cost": 10, "kernel_name": "fused_mul_apply_momentum", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Fill", "inputs": [{"index": 0, "name": "value", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "dims", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "FracZ"], ["int32", "FracZ"]], [["int32", "C1HWNCoC0"], ["int32", "C1HWNCoC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "FracZ"], ["int8", "FracZ"]], [["int8", "C1HWNCoC0"], ["int8", "C1HWNCoC0"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "FracZ"], ["uint8", "FracZ"]], [["uint8", "C1HWNCoC0"], ["uint8", "C1HWNCoC0"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fill_d.so", "compute_cost": 10, "kernel_name": "fill_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Erf", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "erf.so", "compute_cost": 10, "kernel_name": "erf", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Erfc", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "erfc.so", "compute_cost": 10, "kernel_name": "erfc", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "DepthwiseConv2dNative", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "filter", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "bias", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 3, "name": "offset_w", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "stride", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "dilation", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pads", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "data_format", "param_type": "required", "type": "str", "value": "all"}, {"name": "offset_a", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "depthwise_conv2d.so", "compute_cost": 10, "kernel_name": "depthwise_conv2d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "DepthwiseConv2dNativeBackpropFilter", "inputs": [{"index": 0, "name": "input", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "out_backprop", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "filter_grad", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "filter_size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "stride", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "dilation", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pads", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "data_format", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "C1HWNCoC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "depthwise_conv2d_backprop_filter_d.so", "compute_cost": 10, "kernel_name": "depthwise_conv2d_backprop_filter_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "DepthwiseConv2dNativeBackpropInput", "inputs": [{"index": 0, "name": "filter", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "out_backprop", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "input_grad", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "input_size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "stride", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "dilation", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pads", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "data_format", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "CONVLUTION", "dtype_format": [[["float16", "C1HWNCoC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "depthwise_conv2d_backprop_input_d.so", "compute_cost": 10, "kernel_name": "depthwise_conv2d_backprop_input_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "GreaterEqual", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""], ["bool", ""]], [["uint8", ""], ["uint8", ""], ["bool", ""]], [["int32", ""], ["int32", ""], ["bool", ""]], [["float16", ""], ["float16", ""], ["bool", ""]], [["float32", ""], ["float32", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "greater_equal.so", "compute_cost": 10, "kernel_name": "greater_equal", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "NotEqual", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["bool", ""]], [["uint8", ""], ["uint8", ""], ["bool", ""]], [["int32", ""], ["int32", ""], ["bool", ""]], [["float16", ""], ["float16", ""], ["bool", ""]], [["float32", ""], ["float32", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "not_equal.so", "compute_cost": 10, "kernel_name": "not_equal", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "FloorMod", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""], ["int32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "floor_mod.so", "compute_cost": 10, "kernel_name": "floor_mod", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ScatterNdUpdate", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["bool", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_nd_update.so", "compute_cost": 10, "kernel_name": "scatter_nd_update", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AvgPool", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "avg_pool.so", "compute_cost": 10, "kernel_name": "avg_pool", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "AvgPoolGrad", "inputs": [{"index": 0, "name": "input_grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "mean_matrix", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 2, "name": "kernel_matrix", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "out_grad", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [{"name": "x_origin", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "C1HWNCoC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "avg_pool_grad_d.so", "compute_cost": 10, "kernel_name": "avg_pool_grad_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "OnesLike", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["uint8", ""], ["uint8", ""]], [["int8", ""], ["int8", ""]], [["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "ones_like.so", "compute_cost": 10, "kernel_name": "ones_like", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "BatchToSpace", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "block_size", "param_type": "required", "type": "int", "value": "all"}, {"name": "crops", "param_type": "required", "type": "listListInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "batch_to_space_d.so", "compute_cost": 10, "kernel_name": "batch_to_space_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SpaceToBatch", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "block_size", "param_type": "required", "type": "int", "value": "all"}, {"name": "paddings", "param_type": "required", "type": "listListInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "space_to_batch_d.so", "compute_cost": 10, "kernel_name": "space_to_batch_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "DepthToSpace", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "block_size", "param_type": "required", "type": "int", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NHWC"], ["float16", "NHWC"]], [["float32", "NHWC"], ["float32", "NHWC"]], [["int8", "NHWC"], ["int8", "NHWC"]], [["int16", "NHWC"], ["int16", "NHWC"]], [["int32", "NHWC"], ["int32", "NHWC"]], [["int64", "NHWC"], ["int64", "NHWC"]], [["uint8", "NHWC"], ["uint8", "NHWC"]], [["uint16", "NHWC"], ["uint16", "NHWC"]], [["uint32", "NHWC"], ["uint32", "NHWC"]], [["uint64", "NHWC"], ["uint64", "NHWC"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "depth_to_space.so", "compute_cost": 10, "kernel_name": "depth_to_space", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SpaceToDepth", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "block_size", "param_type": "required", "type": "int", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NHWC"], ["float16", "NHWC"]], [["float32", "NHWC"], ["float32", "NHWC"]], [["int8", "NHWC"], ["int8", "NHWC"]], [["int16", "NHWC"], ["int16", "NHWC"]], [["int32", "NHWC"], ["int32", "NHWC"]], [["int64", "NHWC"], ["int64", "NHWC"]], [["uint8", "NHWC"], ["uint8", "NHWC"]], [["uint16", "NHWC"], ["uint16", "NHWC"]], [["uint32", "NHWC"], ["uint32", "NHWC"]], [["uint64", "NHWC"], ["uint64", "NHWC"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "space_to_depth.so", "compute_cost": 10, "kernel_name": "space_to_depth", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Floor", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "floor.so", "compute_cost": 10, "kernel_name": "floor", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Ceil", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "ceil.so", "compute_cost": 10, "kernel_name": "ceil", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Log1p", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "log1p.so", "compute_cost": 10, "kernel_name": "log1p", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "ResizeBilinear", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "align_corners", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "half_pixel_centers", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "resize_bilinear_v2_d.so", "compute_cost": 10, "kernel_name": "resize_bilinear_v2_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ResizeBilinearGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "original_image", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "align_corners", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "half_pixel_centers", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "resize_bilinear_v2_grad.so", "compute_cost": 10, "kernel_name": "resize_bilinear_v2_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Flatten", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "flatten.so", "compute_cost": 10, "kernel_name": "flatten", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ROIAlign", "inputs": [{"index": 0, "name": "features", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "rois", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "rois_n", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "spatial_scale", "param_type": "required", "type": "float", "value": "all"}, {"name": "pooled_height", "param_type": "required", "type": "int", "value": "all"}, {"name": "pooled_width", "param_type": "required", "type": "int", "value": "all"}, {"name": "sample_num", "param_type": "optional", "type": "int", "value": "all", "default_value": "2"}, {"name": "roi_end_mode", "param_type": "optional", "type": "0,1", "value": "1"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "roi_align.so", "compute_cost": 10, "kernel_name": "roi_align", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ROIAlignGrad", "inputs": [{"index": 0, "name": "ydiff", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "rois", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "rois_n", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "xdiff", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "xdiff_shape", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pooled_width", "param_type": "required", "type": "int", "value": "all"}, {"name": "pooled_height", "param_type": "required", "type": "int", "value": "all"}, {"name": "spatial_scale", "param_type": "required", "type": "float", "value": "all"}, {"name": "sample_num", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "roi_align_grad.so", "compute_cost": 10, "kernel_name": "roi_align_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BoundingBoxDecode", "inputs": [{"index": 0, "name": "rois", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "deltas", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "bboxes", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "means", "param_type": "optional", "type": "listFloat", "value": "all"}, {"name": "stds", "param_type": "optional", "type": "listFloat", "value": "all"}, {"name": "max_shape", "param_type": "optional", "type": "listInt", "value": "all"}, {"name": "wh_ratio_clip", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bounding_box_decode.so", "compute_cost": 10, "kernel_name": "bounding_box_decode", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BoundingBoxEncode", "inputs": [{"index": 0, "name": "anchor_box", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "ground_truth_box", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "delats", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "means", "param_type": "optional", "type": "listFloat", "value": "all"}, {"name": "stds", "param_type": "optional", "type": "listFloat", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bounding_box_encode.so", "compute_cost": 10, "kernel_name": "bounding_box_encode", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "CheckValid", "inputs": [{"index": 0, "name": "bbox_tensor", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "img_tas", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "valid_tensor", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float16", ""], ["int8", ""]], [["float16", ""], ["float16", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "check_valid.so", "compute_cost": 10, "kernel_name": "check_valid", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "IOU", "inputs": [{"index": 0, "name": "bboxes", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "gtboxes", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "overlap", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "mode", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "iou.so", "compute_cost": 10, "kernel_name": "iou", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Argmax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "output_dtype", "param_type": "optional", "type": "type", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "arg_max_d.so", "compute_cost": 10, "kernel_name": "arg_max_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "NMSWithMask", "inputs": [{"index": 0, "name": "box_scores", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "selected_boxes", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 0, "name": "selected_idx", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 0, "name": "selected_mask", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "iou_threshold", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "nms_with_mask.so", "compute_cost": 10, "kernel_name": "nms_with_mask", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SGD", "inputs": [{"index": 0, "name": "parameters", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "gradient", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "learning_rate", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "momentum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "stat", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "parameters", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "dampening", "param_type": "optional", "type": "float", "value": "all"}, {"name": "weight_decay", "param_type": "optional", "type": "float", "value": "all"}, {"name": "nesterov", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sgd.so", "compute_cost": 10, "kernel_name": "sgd", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LARSUpdate", "inputs": [{"index": 0, "name": "w", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "g", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "w_square_sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "g_square_sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "weight_decay", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "learning_rate", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "g_new", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "hyperpara", "param_type": "optional", "type": "float", "value": "all"}, {"name": "epsilon", "param_type": "optional", "type": "float", "value": "all"}, {"name": "use_clip", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lars_v2_update.so", "compute_cost": 10, "kernel_name": "lars_v2_update", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Argmin", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "output_dtype", "param_type": "optional", "type": "type", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "arg_min_d.so", "compute_cost": 10, "kernel_name": "arg_min_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BNTrainingUpdateV2", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "square_sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "offset", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "batch_mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float16", ""], ["float32", ""], ["float32", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_training_update_v2.so", "compute_cost": 10, "kernel_name": "bn_training_update_v2", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "BNTrainingUpdateV3", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "square_sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "offset", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "batch_mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "reserve_1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "reserve_2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_training_update_v3.so", "compute_cost": 10, "kernel_name": "bn_training_update_v3", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SquareSumAll", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "square_sum_all.so", "compute_cost": 10, "kernel_name": "square_sum", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Pack", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "NDHWC"], ["int8", "NDHWC"]], [["int16", "NDHWC"], ["int16", "NDHWC"]], [["int32", "NDHWC"], ["int32", "NDHWC"]], [["int64", "NDHWC"], ["int64", "NDHWC"]], [["uint8", "NDHWC"], ["uint8", "NDHWC"]], [["uint16", "NDHWC"], ["uint16", "NDHWC"]], [["uint32", "NDHWC"], ["uint32", "NDHWC"]], [["uint64", "NDHWC"], ["uint64", "NDHWC"]], [["float16", "NDHWC"], ["float16", "NDHWC"]], [["float32", "NDHWC"], ["float32", "NDHWC"]], [["bool", "NDHWC"], ["bool", "NDHWC"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "pack.so", "compute_cost": 10, "kernel_name": "pack", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Unpack", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "attr": [{"name": "num", "param_type": "optional", "type": "int", "value": "all"}, {"name": "axis", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int16", "NC1HWC0"], ["int16", "NC1HWC0"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int64", "NC1HWC0"], ["int64", "NC1HWC0"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint16", "NC1HWC0"], ["uint16", "NC1HWC0"]], [["uint32", "NC1HWC0"], ["uint32", "NC1HWC0"]], [["uint64", "NC1HWC0"], ["uint64", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "unpack.so", "compute_cost": 10, "kernel_name": "unpack", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterUpdate", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["bool", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_update.so", "compute_cost": 10, "kernel_name": "scatter_update", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "PReLU", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "weight", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NCHW"], ["float16", "DefaultFormat"], ["float16", "NCHW"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NCHW"], ["float32", "DefaultFormat"], ["float32", "NCHW"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "prelu.so", "compute_cost": 10, "kernel_name": "prelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "PReLUGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "features", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "weights", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "dx", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 0, "name": "da", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float32", "NCHW"], ["float32", "NCHW"], ["float32", "DefaultFormat"], ["float32", "NCHW"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "prelu_grad.so", "compute_cost": 10, "kernel_name": "prelu_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BinaryCrossEntropy", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "weight", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "output", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "reduction", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""], ["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "binary_cross_entropy.so", "compute_cost": 10, "kernel_name": "binary_cross_entropy", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "BinaryCrossEntropyGrad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "grad_output", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "weight", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "output", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "reduction", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "binary_cross_entropy_grad.so", "compute_cost": 10, "kernel_name": "binary_cross_entropy_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Sin", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sin.so", "compute_cost": 10, "kernel_name": "sin", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Cos", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "cos.so", "compute_cost": 10, "kernel_name": "cos", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "CumSum", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "int", "value": "all", "default_value": "0"}, {"name": "exclusive", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "fales"}, {"name": "reverse", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "cumsum_d.so", "compute_cost": 10, "kernel_name": "cumsum_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ApplyRMSProp", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "ms", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "mom", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "ms", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "mom", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "rho", "param_type": "required", "type": "float", "value": "all"}, {"name": "momentum", "param_type": "required", "type": "float", "value": "all"}, {"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "apply_rms_prop.so", "compute_cost": 10, "kernel_name": "apply_rms_prop_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "CumProd", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "int", "value": "all"}, {"name": "exclusive", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "reverse", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "cumprod_d.so", "compute_cost": 10, "kernel_name": "cumprod_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ReduceProd", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reduce_prod_d.so", "compute_cost": 10, "kernel_name": "reduce_prod_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "reduce"}
+{"op_name": "FlattenGrad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "shape", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reshape.so", "compute_cost": 10, "kernel_name": "reshape", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterAdd", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_add.so", "compute_cost": 10, "kernel_name": "scatter_add", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Atan2", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "atan2.so", "compute_cost": 10, "kernel_name": "atan2", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "BesselI0e", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bessel_i0e.so", "compute_cost": 10, "kernel_name": "bessel_i0e", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "BesselI1e", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bessel_i1e.so", "compute_cost": 10, "kernel_name": "bessel_i1e", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "BatchToSpaceND", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NH"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NH"}], "attr": [{"name": "block_shape", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "crops", "param_type": "required", "type": "listListInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "batch_to_space_nd_d.so", "compute_cost": 10, "kernel_name": "batch_to_space_nd_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SpaceToBatchND", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NH"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NH"}], "attr": [{"name": "block_shape", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "paddings", "param_type": "required", "type": "listListInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "space_to_batch_nd_d.so", "compute_cost": 10, "kernel_name": "space_to_batch_nd_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BitwiseAnd", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int16", ""], ["int16", ""], ["int16", ""]], [["uint16", ""], ["uint16", ""], ["uint16", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bitwise_and.so", "compute_cost": 10, "kernel_name": "bitwise_and", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "BitwiseOr", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int16", ""], ["int16", ""], ["int16", ""]], [["uint16", ""], ["uint16", ""], ["uint16", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bitwise_or.so", "compute_cost": 10, "kernel_name": "bitwise_or", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "BitwiseXor", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int16", ""], ["int16", ""], ["int16", ""]], [["uint16", ""], ["uint16", ""], ["uint16", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bitwise_xor.so", "compute_cost": 10, "kernel_name": "bitwise_xor", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "ReduceAll", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "keep_dims", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "reduce_all_d.so", "compute_cost": 10, "kernel_name": "reduce_all_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "reduce"}
+{"op_name": "SparseApplyAdagrad", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "lr", "param_type": "required", "type": "float", "value": "all"}, {"name": "update_slots", "param_type": "optional", "type": "bool", "value": "all"}, {"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["int32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]], [["float32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"], ["int32", "NHWC"], ["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sparse_apply_adagrad_d.so", "compute_cost": 10, "kernel_name": "sparse_apply_adagrad_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "UnsortedSegmentMin", "inputs": [{"index": 0, "name": "data", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "segment_ids", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "num_segments", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["int32", "DefaultFormat"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["int32", "DefaultFormat"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["int32", "DefaultFormat"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["int32", "DefaultFormat"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["int32", "DefaultFormat"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["int32", "DefaultFormat"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "DefaultFormat"], ["int32", "NC1HWC0"]], [["int32", "FracZ"], ["int32", "DefaultFormat"], ["int32", "FracZ"]], [["int32", "C1HWNCoC0"], ["int32", "DefaultFormat"], ["int32", "C1HWNCoC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "unsorted_segment_min_d.so", "compute_cost": 10, "kernel_name": "unsorted_segment_min_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Asin", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "asin.so", "compute_cost": 10, "kernel_name": "asin", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "AsinGrad", "inputs": [{"index": 0, "name": "y", "param_type": "required", "shape": "all"}, {"index": 1, "name": "dy", "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "asin_grad.so", "compute_cost": 10, "kernel_name": "asin_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Asinh", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "asinh.so", "compute_cost": 10, "kernel_name": "asinh", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "AsinhGrad", "inputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "asinh_grad.so", "compute_cost": 10, "kernel_name": "asinh_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "DivNoNan", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "div_no_nan.so", "compute_cost": 10, "kernel_name": "div_no_nan", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "Atan", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "atan.so", "compute_cost": 10, "kernel_name": "atan", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "AtanGrad", "inputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["float16", "FRACTAL_NZ"], ["float16", "FracZ"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["float32", "FRACTAL_NZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "atan_grad.so", "compute_cost": 10, "kernel_name": "atan_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Atanh", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "atanh.so", "compute_cost": 10, "kernel_name": "atanh", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Cosh", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "cosh.so", "compute_cost": 10, "kernel_name": "cosh", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Sinh", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "sinh.so", "compute_cost": 10, "kernel_name": "sinh", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "Inv", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int32", ""], ["int32", ""]], [["float32", ""], ["float32", ""]], [["float16", ""], ["float16", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "inv.so", "compute_cost": 10, "kernel_name": "inv", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "InvGrad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]], [["int32", ""], ["int32", ""], ["int32", ""]], [["int8", ""], ["int8", ""], ["int8", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "inv_grad.so", "compute_cost": 10, "kernel_name": "inv_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "Invert", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int16", ""], ["int16", ""]], [["uint16", ""], ["uint16", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "invert.so", "compute_cost": 10, "kernel_name": "invert", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "formatAgnostic"}
+{"op_name": "BasicLSTMCell", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "h", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "c", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "w", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "b", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "mask", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "ct", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "ht", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "it", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 3, "name": "jt", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 4, "name": "ft", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 5, "name": "ot", "need_compile": false, "param_type": "optional", "shape": "all"}, {"index": 6, "name": "tanhct", "need_compile": false, "param_type": "optional", "shape": "all"}], "attr": [{"name": "keep_prob", "param_type": "optional", "type": "float", "value": "all"}, {"name": "forget_bias", "param_type": "optional", "type": "float", "value": "all"}, {"name": "state_is_tuple", "param_type": "optional", "type": "bool", "value": "true"}, {"name": "activation", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float16", "FracZ"], ["float32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["float32", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["uint8", "DefaultFormat"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "basic_lstm_cell.so", "compute_cost": 10, "kernel_name": "basic_lstm_cell", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BasicLSTMCellCStateGrad", "inputs": [{"index": 0, "name": "c", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dht", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "dct", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "it", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "jt", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "ft", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "ot", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "tanhct", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "dgate", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dct_1", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "forget_bias", "param_type": "optional", "type": "float", "value": "all"}, {"name": "activation", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "basic_lstm_cell_c_state_grad.so", "compute_cost": 10, "kernel_name": "basic_lstm_cell_c_state_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BasicLSTMCellWeightGrad", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "h", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "dgate", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "dw", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "db", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FracZ"], ["float32", "DefaultFormat"]], [["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "basic_lstm_cell_weight_grad.so", "compute_cost": 10, "kernel_name": "basic_lstm_cell_weight_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BasicLSTMCellInputGrad", "inputs": [{"index": 0, "name": "dgate", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "w", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "dropout_mask", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "dxt", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "dht", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "keep_prob", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "FRACTAL_NZ"], ["float16", "FracZ"], ["uint8", "DefaultFormat"], ["float32", "FRACTAL_NZ"], ["float32", "FRACTAL_NZ"]], [["float16", "FRACTAL_NZ"], ["float16", "FracZ"], ["uint8", "DefaultFormat"], ["float16", "FRACTAL_NZ"], ["float16", "FRACTAL_NZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "basic_lstm_cell_input_grad.so", "compute_cost": 10, "kernel_name": "basic_lstm_cell_input_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ConfusionMatrix", "inputs": [{"index": 0, "name": "labels", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "predictions", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "weights", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "num_classes", "param_type": "required", "type": "int", "value": "all"}, {"name": "dtype", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "confusion_matrix.so", "compute_cost": 10, "kernel_name": "confusion_matrix", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "BroadcastTo", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "shape", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint16", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "broadcast_to_d.so", "compute_cost": 10, "kernel_name": "broadcast_to_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "StridedRead", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "stride", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "strided_read.so", "compute_cost": 10, "kernel_name": "strided_read", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "StridedWrite", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "required", "type": "int", "value": "all"}, {"name": "stride", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "strided_write.so", "compute_cost": 10, "kernel_name": "strided_write", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Range", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "start", "param_type": "required", "type": "float", "value": "all"}, {"name": "limit", "param_type": "required", "type": "float", "value": "all"}, {"name": "delta", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "range_d.so", "compute_cost": 10, "kernel_name": "range_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "FusedMulAddNL2loss", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "x3", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"]], [["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fused_mul_addn_l2loss.so", "compute_cost": 10, "kernel_name": "fused_mul_addn_l2loss", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "FusedMulApplyMomentumExtern", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "lr", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "momentum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "var_copy", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "var_copy", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "accum", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_nesterov", "param_type": "optional", "type": "bool", "value": "true,false", "default_value": "false"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float16", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "FracZ"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "FracZ"], ["float32", "FracZ"], ["float16", "FracZ"], ["float16", "FracZ"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "NC1HWC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "C1HWNCoC0"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float16", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "FracZ"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "FracZ"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float16", "FracZ"], ["float32", "FracZ"], ["float16", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "fused_mul_apply_momentum_extern.so", "compute_cost": 10, "kernel_name": "fused_mul_apply_momentum_extern", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LambNextRight", "inputs": [{"index": 0, "name": "input_square", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input_mul2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "mul2_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "mul3_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "truediv1_recip", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "add2_y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lamb_next_right.so", "compute_cost": 10, "kernel_name": "lamb_next_right", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SparseGatherV2", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int64", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int32", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "NC1HWC0"], ["int64", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "FracZ"], ["int32", "FracZ"], ["int8", "FracZ"]], [["int8", "FracZ"], ["int64", "FracZ"], ["int8", "FracZ"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int64", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["int32", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "NC1HWC0"], ["int64", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "FracZ"], ["int32", "FracZ"], ["uint8", "FracZ"]], [["uint8", "FracZ"], ["int64", "FracZ"], ["uint8", "FracZ"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int64", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "NC1HWC0"], ["int64", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "FracZ"], ["int32", "FracZ"], ["int32", "FracZ"]], [["int32", "FracZ"], ["int64", "FracZ"], ["int32", "FracZ"]], [["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "DefaultFormat"], ["int64", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["int32", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["int64", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "FracZ"], ["int32", "FracZ"], ["float16", "FracZ"]], [["float16", "FracZ"], ["int64", "FracZ"], ["float16", "FracZ"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int64", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["int32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NC1HWC0"], ["int64", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "FracZ"], ["int32", "FracZ"], ["float32", "FracZ"]], [["float32", "FracZ"], ["int64", "FracZ"], ["float32", "FracZ"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "gather_v2_d.so", "compute_cost": 10, "kernel_name": "gather_v2_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "DataFormatDimMap", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "dst_format", "param_type": "optional", "type": "str", "value": "all"}, {"name": "src_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "data_format_dim_map.so", "compute_cost": 10, "kernel_name": "data_format_dim_map", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "HistogramFixedWidth", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "range", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "nbins", "param_type": "required", "type": "int", "value": "all"}, {"name": "dtype", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["int32", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "histogram_fixed_width_d.so", "compute_cost": 10, "kernel_name": "histogram_fixed_width_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "TensorScatterUpdate", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "tensor_scatter_update.so", "compute_cost": 10, "kernel_name": "tensor_scatter_update", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "InplaceUpdate", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "v", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "indices", "param_type": "required", "type": "listInt", "value": "all"}], "fusion_type": "INPLACE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "inplace_update_d.so", "compute_cost": 10, "kernel_name": "inplace_update_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "SplitV", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "attr": [{"name": "size_splits", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "split_dim", "param_type": "required", "type": "int", "value": "all"}, {"name": "num_split", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "split_v_d.so", "compute_cost": 10, "kernel_name": "split_v_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "dynamicFormat"}
+{"op_name": "InTopK", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "k", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "in_top_k.so", "compute_cost": 10, "kernel_name": "in_top_k", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LinSpace", "inputs": [{"index": 0, "name": "assist", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "start", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "stop", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "num", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", ""], ["float32", ""], ["float32", ""], ["int32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lin_space.so", "compute_cost": 10, "kernel_name": "lin_space", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "MatrixDiag", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "assist", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "matrix_diag_d.so", "compute_cost": 10, "kernel_name": "matrix_diag_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MatrixDiagPart", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "assist", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "matrix_diag_part_d.so", "compute_cost": 10, "kernel_name": "matrix_diag_part_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MatrixSetDiag", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "diagonal", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "assist", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "matrix_diag_d.so", "compute_cost": 10, "kernel_name": "matrix_diag_d", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LRN", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "depth_radius", "param_type": "optional", "type": "int", "value": "all", "default_value": "5"}, {"name": "bias", "param_type": "optional", "type": "float", "value": "all", "default_value": "1.0"}, {"name": "alpha", "param_type": "optional", "type": "float", "value": "all", "default_value": "1.0"}, {"name": "beta", "param_type": "optional", "type": "float", "value": "all", "default_value": "0.5"}, {"name": "norm_region", "param_type": "optional", "type": "str", "value": "all", "default_value": "ACROSS_CHANNELS"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["float32", "NCHW"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lrn.so", "compute_cost": 10, "kernel_name": "lrn", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "LRNGrad", "inputs": [{"index": 0, "name": "grads", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "z", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "depth_radius", "param_type": "optional", "type": "int", "value": "all"}, {"name": "bias", "param_type": "optional", "type": "float", "value": "all"}, {"name": "alpha", "param_type": "optional", "type": "float", "value": "all"}, {"name": "beta", "param_type": "optional", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NCHW"], ["float16", "NCHW"], ["float16", "NCHW"], ["float16", "NCHW"]], [["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"], ["float32", "NCHW"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "lrn_grad.so", "compute_cost": 10, "kernel_name": "lrn_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterMax", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_max.so", "compute_cost": 10, "kernel_name": "scatter_max", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterMin", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_min.so", "compute_cost": 10, "kernel_name": "scatter_min", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterSub", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_sub.so", "compute_cost": 10, "kernel_name": "scatter_sub", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterMul", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_mul.so", "compute_cost": 10, "kernel_name": "scatter_mul", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ScatterDiv", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_div.so", "compute_cost": 10, "kernel_name": "scatter_div", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "Mod", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "mod.so", "compute_cost": 10, "kernel_name": "mod", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": "broadcast"}
+{"op_name": "MaxPoolGradGrad", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}, {"name": "data_format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad_grad.so", "compute_cost": 10, "kernel_name": "max_pool_grad_grad", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "MaxPoolGradGradWithArgmax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "argmax", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "ksize", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "padding", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["uint16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["int64", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad_grad_with_argmax.so", "compute_cost": 10, "kernel_name": "max_pool_grad_grad_with_argmax", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "PopulationCount", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int16", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["int16", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint16", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "population_count.so", "compute_cost": 10, "kernel_name": "population_count", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
+{"op_name": "ParallelConcat", "inputs": [{"index": 0, "name": "values", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "outputs": [{"index": 0, "name": "output_data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "shape", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "N", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["bool", "NC1HWC0"], ["bool", "NC1HWC0"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int16", "NC1HWC0"], ["int16", "NC1HWC0"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint16", "NC1HWC0"], ["uint16", "NC1HWC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint32", "NC1HWC0"], ["uint32", "NC1HWC0"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["int64", "NC1HWC0"], ["int64", "NC1HWC0"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["uint64", "NC1HWC0"], ["uint64", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["bool", "NHWC"], ["bool", "NHWC"]], [["bool", "NCHW"], ["bool", "NCHW"]], [["int8", "NHWC"], ["int8", "NHWC"]], [["int8", "NCHW"], ["int8", "NCHW"]], [["uint8", "NHWC"], ["uint8", "NHWC"]], [["uint8", "NCHW"], ["uint8", "NCHW"]], [["int16", "NHWC"], ["int16", "NHWC"]], [["int16", "NCHW"], ["int16", "NCHW"]], [["uint16", "NHWC"], ["uint16", "NHWC"]], [["uint16", "NCHW"], ["uint16", "NCHW"]], [["int32", "NHWC"], ["int32", "NHWC"]], [["int32", "NCHW"], ["int32", "NCHW"]], [["uint32", "NHWC"], ["uint32", "NHWC"]], [["uint32", "NCHW"], ["uint32", "NCHW"]], [["int64", "NHWC"], ["int64", "NHWC"]], [["int64", "NCHW"], ["int64", "NCHW"]], [["uint64", "NHWC"], ["uint64", "NHWC"]], [["uint64", "NCHW"], ["uint64", "NCHW"]], [["float16", "NHWC"], ["float16", "NHWC"]], [["float16", "NCHW"], ["float16", "NCHW"]], [["float32", "NHWC"], ["float32", "NHWC"]], [["float32", "NCHW"], ["float32", "NCHW"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "parallel_concat.so", "compute_cost": 10, "kernel_name": "parallel_concat", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "op_pattern": ""}
diff --git a/graphengine b/graphengine
index 4084909d62c..31aa96ef410 160000
--- a/graphengine
+++ b/graphengine
@@ -1 +1 @@
-Subproject commit 4084909d62c159da6ba316f61ad3d02a4857b34b
+Subproject commit 31aa96ef41067a0ecdc4113ef245f8ede48f3457
diff --git a/include/ms_tensor.h b/include/ms_tensor.h
index 1f9661df5e2..fc59e123283 100644
--- a/include/ms_tensor.h
+++ b/include/ms_tensor.h
@@ -20,7 +20,7 @@
 #include <utility>
 #include <vector>
 #include <memory>
-#include "ir/dtype/type_id.h"
+#include "mindspore/core/ir/dtype/type_id.h"
 
 namespace mindspore {
 #define MS_API __attribute__((visibility("default")))
diff --git a/mindspore/_extends/parse/parser.py b/mindspore/_extends/parse/parser.py
index a6043eb7879..9d715fdf539 100644
--- a/mindspore/_extends/parse/parser.py
+++ b/mindspore/_extends/parse/parser.py
@@ -334,7 +334,7 @@ class Parser:
     def __init__(self, fn: (types.FunctionType, types.MethodType), parse_method=None) -> None:
         self.fn = fn
         self.parse_method = parse_method
-        _, self.line_offset = inspect.getsourcelines(self.fn)
+        self.line_offset = 0
         self.filename: str = inspect.getfile(self.fn)
 
         # Used to resolve the function's globals Namespace.
@@ -350,7 +350,8 @@ class Parser:
         logger.debug("fn = %r", self.fn)
         tree = None
         if isinstance(self.fn, (types.FunctionType, types.MethodType)):
-            original_src = inspect.getsource(self.fn)
+            lines, self.line_offset = inspect.getsourcelines(self.fn)
+            original_src = ''.join(lines)
             hexstr = hashlib.sha256(original_src.encode()).hexdigest()
             tree = Parser.ast_cache.get(hexstr)
             if not tree:
diff --git a/mindspore/_extends/parse/standard_method.py b/mindspore/_extends/parse/standard_method.py
index 936099a4fbd..d70c6edcf47 100644
--- a/mindspore/_extends/parse/standard_method.py
+++ b/mindspore/_extends/parse/standard_method.py
@@ -108,7 +108,8 @@ def enumerate_(x, start=0):
     """Enumerate list or tuple."""
     x_type = F.typeof(x)
     ret = ()
-    if check_is_tuple_or_list(x_type, "enumerate"):
+    op_name = "enumerate"
+    if check_is_tuple_or_list(x_type, op_name, "first input") and check_is_const_int(start, op_name, "start"):
         ret = zip(range(start, start + len(x)), x)
     return ret
 
@@ -123,11 +124,22 @@ def while_cond(x):
 
 
 @constexpr
-def check_is_tuple_or_list(x, op_name):
+def check_is_tuple_or_list(x, op_name, arg_name):
     """check whether x is list or tuple."""
     if isinstance(x, (mstype.list_type, mstype.tuple_type)):
         return True
-    raise TypeError(f"For '{op_name}', the input parameter should be tuple or list, but got {x}.")
+    raise TypeError(f"For '{op_name}', the '{arg_name}' should be tuple or list, but got {x}.")
+
+
+@constexpr
+def check_is_const_int(x, op_name, arg_name):
+    """check whether x is const int."""
+    if x is None:
+        raise TypeError(f"For '{op_name}', the '{arg_name}' should be a const int number, but got not const.")
+    if not isinstance(x, int):
+        raise TypeError(f"For '{op_name}', the '{arg_name}' should be a const int number, but got {x}.")
+    return True
+
 
 @constexpr
 def check_is_tensor_bool_cond(shp):
diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt
index 9dc1502aa5c..bb02f338f64 100644
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -1,4 +1,5 @@
 ## common setting
+include_directories(${CMAKE_SOURCE_DIR}/mindspore/core)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 include_directories(${CMAKE_BINARY_DIR})
 link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine)
@@ -35,20 +36,20 @@ if(ENABLE_GPU)
     include_directories(${CUDNN_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS})
 
     file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-            "device/gpu/*.cc"
-            "device/gpu/*.cu"
-            "kernel/gpu/*.cu"
-            "kernel/akg/gpu/*.cc"
-            "kernel/akg/akg_kernel_build.cc"
-            "kernel/akg/akg_kernel_attrs_process.cc"
+            "runtime/device/gpu/*.cc"
+            "runtime/device/gpu/*.cu"
+            "backend/kernel_compiler/gpu/*.cu"
+            "backend/kernel_compiler/akg/gpu/*.cc"
+            "backend/kernel_compiler/akg/akg_kernel_build.cc"
+            "backend/kernel_compiler/akg/akg_kernel_attrs_process.cc"
             )
 
     list(APPEND CUDA_NVCC_FLAGS -arch=sm_53)
-    list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/blocking_queue.cc" "device/gpu/gpu_buffer_mgr.cc")
-    list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/mpi/mpi_initializer.cc"
-                                  "device/gpu/distribution/collective_wrapper.cc"
-                                  "device/gpu/distribution/mpi_wrapper.cc"
-                                  "device/gpu/distribution/nccl_wrapper.cc"
+    list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/blocking_queue.cc" "runtime/device/gpu/gpu_buffer_mgr.cc")
+    list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/mpi/mpi_initializer.cc"
+                                  "runtime/device/gpu/distribution/collective_wrapper.cc"
+                                  "runtime/device/gpu/distribution/mpi_wrapper.cc"
+                                  "runtime/device/gpu/distribution/nccl_wrapper.cc"
                                   )
 
     set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
@@ -56,6 +57,7 @@ if(ENABLE_GPU)
     set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
     cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST})
     set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS})
+    add_compile_definitions(ENABLE_GPU)
 endif ()
 
 ## make flatuffer files
@@ -101,16 +103,20 @@ if (ENABLE_DUMP_PROTO)
 endif ()
 
 if (ENABLE_D)
-    include_directories("${CMAKE_BINARY_DIR}/kernel/aicpu")
+    include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu")
     include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir")
-    file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "kernel/aicpu/proto/*.proto")
+    file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto")
     ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN})
     
     file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto")
     ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER})
 
+    file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "runtime/device/ascend/dump/proto/*.proto")
+    ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP})
+
     list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS})
     list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS})
+    list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS})
 
     add_compile_definitions(ENABLE_D)
 endif ()
@@ -121,18 +127,36 @@ if (MINDSPORE_PROTO_LIST)
 endif()
 
 ## make sub objects
-set(SUB_COMP 
-    transform pre_activate parallel pipeline device kernel common debug gvar ir onnx operator optimizer predict
-    pybind_api pynative session utils vm
+set(SUB_COMP
+        transform/graph_ir
+        transform/onnx
+        backend/optimizer
+        backend/kernel_compiler
+        backend/session
+        runtime/device
+        frontend/optimizer
+        frontend/parallel
+        frontend/operator
+        pipeline/jit
+        pipeline/pynative
+        common debug gvar predict pybind_api utils vm
 )
 
 foreach (_comp ${SUB_COMP})
     add_subdirectory(${_comp})
-    if (TARGET _mindspore_${_comp}_obj)
-        list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${_comp}_obj>)
-        add_dependencies(_mindspore_${_comp}_obj proto_input flat_input)
+    string(REPLACE "/" "_" sub ${_comp})
+    if (TARGET _mindspore_${sub}_obj)
+        list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>)
+        add_dependencies(_mindspore_${sub}_obj proto_input flat_input)
     endif ()
 endforeach ()
+add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/base base)
+list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_base_obj>)
+add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/abstract abstract)
+list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_abstract_obj>)
+add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/ir ir)
+list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_ir_obj>)
+add_dependencies(_mindspore_base_obj _mindspore_ir_obj _mindspore_abstract_obj proto_input flat_input)
 
 set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
 add_library(mindspore STATIC ${SUB_OBJECTS_SRC})
@@ -204,8 +228,8 @@ endif()
 
 # set c_expression building
 set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
-set_property(SOURCE "pipeline/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
-pybind11_add_module(_c_expression "pipeline/init.cc")
+set_property(SOURCE "pipeline/jit/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
+pybind11_add_module(_c_expression "pipeline/jit/init.cc")
 
 MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}")
 if (CMAKE_SYSTEM_NAME MATCHES "Linux")
@@ -231,9 +255,11 @@ else ()
     target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive)
     target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module)
     target_link_libraries(_c_expression PRIVATE mindspore_gvar)
-    target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
-    if (${ENABLE_IBVERBS} STREQUAL "ON")
-        target_link_libraries(_c_expression PRIVATE ibverbs rdmacm)
+    if (NOT ENABLE_GE)
+        target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
+        if (${ENABLE_IBVERBS} STREQUAL "ON")
+            target_link_libraries(_c_expression PRIVATE ibverbs rdmacm)
+        endif()
     endif()
 endif ()
 
@@ -260,8 +286,8 @@ if (ENABLE_CPU)
 endif ()
 
 if (ENABLE_MINDDATA)
-    add_subdirectory(mindrecord)
-    add_subdirectory(dataset)
+    add_subdirectory(minddata/mindrecord)
+    add_subdirectory(minddata/dataset)
 endif ()
 
 # build inference
@@ -270,7 +296,7 @@ set(LOAD_ONNX_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/utils/load_onnx/anf_model_parser.cc
         )
 add_library(inference SHARED
-        ${CMAKE_CURRENT_SOURCE_DIR}/session/session.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/backend/session/session.cc
         ${LOAD_ONNX_SRC}
         )
 target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}
diff --git a/mindspore/ccsrc/kernel/CMakeLists.txt b/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
similarity index 73%
rename from mindspore/ccsrc/kernel/CMakeLists.txt
rename to mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
index ceea6b1a990..b412d83d116 100644
--- a/mindspore/ccsrc/kernel/CMakeLists.txt
+++ b/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
@@ -25,7 +25,15 @@ if (ENABLE_CPU)
     file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         "cpu/*.cc"
     )
-    
+
+    list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/push_kernel.cc" 
+                                  "cpu/ps/pull_kernel.cc"
+                                  "cpu/ps/embedding_look_up_ps_kernel.cc"
+                                  "cpu/ps/embedding_look_up_proxy_kernel.cc"
+                                  "cpu/ps/apply_momentum_ps_kernel.cc"
+                                  "cpu/ps/sparse_apply_adam_ps_kernel.cc"
+                                  "cpu/ps/sparse_apply_ftrl_ps_kernel.cc")
+
     if (NOT ENABLE_MPI)
         list(REMOVE_ITEM CPU_SRC_LIST "cpu/allgather_cpu_kernel.cc")
         list(REMOVE_ITEM CPU_SRC_LIST "cpu/reduce_scatter_cpu_kernel.cc")
@@ -55,4 +63,4 @@ endif()
 
 set_property(SOURCE ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST}
     PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_KERNEL)
-add_library(_mindspore_kernel_obj OBJECT ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST})
+add_library(_mindspore_backend_kernel_compiler_obj OBJECT ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST})
diff --git a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc
new file mode 100644
index 00000000000..7e7fd20f397
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc
@@ -0,0 +1,312 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
+#include <google/protobuf/text_format.h>
+#include <fstream>
+#include <utility>
+#include <string>
+#include <vector>
+#include <memory>
+#include <algorithm>
+#include <map>
+#include "runtime/device/kernel_runtime.h"
+#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
+#include "backend/kernel_compiler/akg/akg_kernel_build.h"
+#include "proto/tensor.pb.h"
+#include "proto/tensor_shape.pb.h"
+#include "proto/attr.pb.h"
+#include "proto/node_def.pb.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "common/utils.h"
+#include "backend/kernel_compiler/aicpu/aicpu_util.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/common_utils.h"
+
+namespace mindspore {
+namespace kernel {
+using FNodeAttrHandle = std::function<void(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto)>;
+
+bool SetIOIputSize(const std::shared_ptr<AnfNode> &anf_node, const size_t &input_num,
+                   std::vector<size_t> *input_size_list) {
+  MS_EXCEPTION_IF_NULL(anf_node);
+  MS_EXCEPTION_IF_NULL(input_size_list);
+  for (size_t i = 0; i < input_num; i++) {
+    std::vector<size_t> shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i);
+    if (AnfAlgo::GetInputDeviceDataType(anf_node, i) == kObjectTypeString) {
+      if (!anf_node->isa<CNode>()) {
+        MS_LOG(EXCEPTION) << "anf_node is not CNode.";
+      }
+      auto cnode = anf_node->cast<CNodePtr>();
+      MS_EXCEPTION_IF_NULL(cnode);
+      if (cnode->inputs().size() < (i + 1)) {
+        MS_LOG(ERROR) << "cnode inputs size " << cnode->inputs().size() << " is smaller than " << i + 1;
+        return false;
+      }
+      auto input_node = cnode->inputs()[i + 1];
+      MS_EXCEPTION_IF_NULL(input_node);
+      if (input_node->isa<ValueNode>()) {
+        auto value_ptr = GetValueNode(input_node);
+        auto value = GetValue<std::string>(value_ptr);
+        input_size_list->push_back(value.size());
+      }
+    } else {
+      auto type_ptr = TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, i));
+      MS_EXCEPTION_IF_NULL(type_ptr);
+      int64_t size_i = 1;
+      for (size_t j = 0; j < shape_i.size(); j++) {
+        size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
+      }
+      size_t type_byte = GetTypeByte(type_ptr);
+      if (type_byte == 0) {
+        return false;
+      }
+      size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
+      input_size_list->push_back(LongToSize(size_i));
+    }
+  }
+  return true;
+}
+
+bool SetIOSize(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
+  MS_EXCEPTION_IF_NULL(anf_node);
+  MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
+  std::vector<size_t> input_size_list;
+  std::vector<size_t> output_size_list;
+  size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
+  size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
+
+  if (!SetIOIputSize(anf_node, input_num, &input_size_list)) {
+    return false;
+  }
+  kernel_mod_ptr->SetInputSizeList(input_size_list);
+
+  for (size_t i = 0; i < output_num; i++) {
+    std::vector<size_t> shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i);
+    TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i));
+    MS_EXCEPTION_IF_NULL(type_ptr);
+    int64_t size_i = 1;
+    for (size_t j = 0; j < shape_i.size(); j++) {
+      size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
+    }
+    size_t type_byte = GetTypeByte(type_ptr);
+    if (type_byte == 0) {
+      return false;
+    }
+    size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
+    output_size_list.push_back(LongToSize(size_i));
+  }
+  kernel_mod_ptr->SetOutputSizeList(output_size_list);
+  return true;
+}
+
+void ParseAttrValue(const std::string &type, const std::string &attr_name, const mindspore::ValuePtr &value,
+                    ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr) {
+  MS_EXCEPTION_IF_NULL(node_attr);
+  MS_EXCEPTION_IF_NULL(value);
+  if (type == "int") {
+    auto attr_value = GetValue<int>(value);
+    (*node_attr)[attr_name].set_i(attr_value);
+  } else if (type == "str") {
+    auto attr_value = GetValue<std::string>(value);
+    (*node_attr)[attr_name].set_s(attr_value);
+  } else if (type == "bool") {
+    auto attr_value = GetValue<bool>(value);
+    (*node_attr)[attr_name].set_b(attr_value);
+  } else if (type == "float") {
+    auto attr_value = GetValue<float>(value);
+    (*node_attr)[attr_name].set_f(attr_value);
+  } else if (type == "listInt") {
+    std::vector<int> attr_value;
+    auto value_type = value->type();
+    MS_EXCEPTION_IF_NULL(value_type);
+    auto value_type_str = value_type->ToString();
+    if (value_type_str == "Int32") {
+      int data = GetValue<int>(value);
+      attr_value.push_back(data);
+    } else {
+      attr_value = GetValue<std::vector<int>>(value);
+    }
+    mindspore::AttrValue input_shape_attr;
+    mindspore::AttrValue_ArrayValue *input_shape_attr_list = input_shape_attr.mutable_array();
+    MS_EXCEPTION_IF_NULL(input_shape_attr_list);
+    for (const auto shape : attr_value) {
+      input_shape_attr_list->add_i(shape);
+    }
+    (*node_attr)[attr_name] = input_shape_attr;
+  } else {
+    MS_LOG(EXCEPTION) << "type: " << type << "not support";
+  }
+}
+
+void SetNodeAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
+  MS_EXCEPTION_IF_NULL(anf_node);
+  MS_EXCEPTION_IF_NULL(proto);
+  std::string op_name = AnfAlgo::GetCNodeName(anf_node);
+  if (op_name == kInitDataSetQueue) {
+    op_name = kInitData;
+  }
+  if (op_name == kPrint) {
+    return;
+  }
+
+  auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU);
+  MS_EXCEPTION_IF_NULL(op_info_ptr);
+  auto attrs_ptr = op_info_ptr->attrs_ptr();
+  auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
+  MS_EXCEPTION_IF_NULL(primitive);
+  ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs();
+  for (const auto &attr_ptr : attrs_ptr) {
+    MS_EXCEPTION_IF_NULL(attr_ptr);
+    std::string attr_name = attr_ptr->name();
+    auto value = primitive->GetAttr(attr_name);
+    if (value != nullptr) {
+      if (attr_name == kQueueName || attr_name == kSharedName) {
+        attr_name = kChannelName;
+      } else if (attr_name == kSeed0) {
+        attr_name = kSeed;
+      } else if (attr_name == kSeed1) {
+        attr_name = kSeed2;
+      }
+      std::string type = attr_ptr->type();
+      ParseAttrValue(type, attr_name, value, node_attr);
+    }
+  }
+  MS_LOG(INFO) << "Set node attr end!";
+}
+
+void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
+  MS_EXCEPTION_IF_NULL(proto);
+  MS_EXCEPTION_IF_NULL(anf_node);
+  size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
+  if (input_num == 0) {
+    MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have input.";
+    return;
+  }
+
+  for (size_t input_index = 0; input_index < input_num; input_index++) {
+    ::mindspore::Tensor *node_inputs = proto->add_inputs();
+    MS_EXCEPTION_IF_NULL(node_inputs);
+    TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);
+    std::vector<size_t> input_shape;
+    int32_t input_data_type;
+    if (input_type == kObjectTypeString) {
+      auto cnode = anf_node->cast<CNodePtr>();
+      MS_EXCEPTION_IF_NULL(cnode);
+      auto input_node = cnode->inputs()[input_index + 1];
+      auto value_ptr = GetValueNode(input_node);
+      auto value = GetValue<std::string>(value_ptr);
+      input_shape.push_back(1);
+      input_shape.push_back(value.size());
+      input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown);
+    } else {
+      input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);
+      input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type);
+    }
+
+    mindspore::TensorShape *tensorShape = node_inputs->mutable_tensor_shape();
+    for (auto item : input_shape) {
+      mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
+      dim->set_size((::google::protobuf::int64)item);
+    }
+    node_inputs->set_tensor_type((mindspore::DataType)input_data_type);
+    node_inputs->set_mem_device("HBM");
+  }
+}
+
+void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
+  MS_EXCEPTION_IF_NULL(proto);
+  MS_EXCEPTION_IF_NULL(anf_node);
+  size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
+  if (output_num == 0) {
+    MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have output. ";
+    return;
+  }
+
+  for (size_t output_index = 0; output_index < output_num; output_index++) {
+    ::mindspore::Tensor *node_outputs = proto->add_outputs();
+    MS_EXCEPTION_IF_NULL(node_outputs);
+    std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);
+    mindspore::TensorShape *tensorShape = node_outputs->mutable_tensor_shape();
+    MS_EXCEPTION_IF_NULL(tensorShape);
+    for (auto item : output_shape) {
+      mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
+      MS_EXCEPTION_IF_NULL(dim);
+      dim->set_size((::google::protobuf::int64)item);
+    }
+    TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
+    int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
+    node_outputs->set_tensor_type((mindspore::DataType)output_data_type);
+    node_outputs->set_mem_device("HBM");
+  }
+}
+
+void SetNodedefProto(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
+  MS_EXCEPTION_IF_NULL(anf_node);
+  MS_EXCEPTION_IF_NULL(proto);
+  MS_LOG(INFO) << "SetNodedefProto entry";
+  std::string op_name = AnfAlgo::GetCNodeName(anf_node);
+  if (op_name == kInitDataSetQueue) {
+    op_name = kInitData;
+  }
+  // set op name
+  proto->set_op(op_name);
+  // set inputs tensor
+  SetNodeInputs(anf_node, proto);
+  // set outputs tensor
+  SetNodeOutputs(anf_node, proto);
+  // set node attr
+  SetNodeAttr(anf_node, proto);
+  MS_LOG(INFO) << "SetNodedefProto end!";
+}
+
+bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
+                        const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
+  MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
+  MS_EXCEPTION_IF_NULL(anf_node);
+  MS_LOG(INFO) << "CreateNodeDefBytes entry";
+
+  mindspore::NodeDef proto;
+  SetNodedefProto(anf_node, &proto);
+  std::string nodeDefStr;
+  if (!proto.SerializeToString(&nodeDefStr)) {
+    MS_LOG(ERROR) << "Serialize nodeDef to string failed.";
+    return false;
+  }
+  kernel_mod_ptr->SetNodeDef(nodeDefStr);
+  MS_LOG(INFO) << "CreateNodeDefBytes end!";
+  return true;
+}
+
+KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
+  MS_EXCEPTION_IF_NULL(anf_node);
+  std::string op_name = AnfAlgo::GetCNodeName(anf_node);
+  if (op_name == kInitDataSetQueue) {
+    op_name = kInitData;
+  }
+  auto kernel_mod_ptr = std::make_shared<AicpuOpKernelMod>();
+  MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
+  kernel_mod_ptr->SetAnfNode(anf_node);
+  kernel_mod_ptr->SetNodeName(op_name);
+  if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) {
+    MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
+  }
+  if (!SetIOSize(anf_node, kernel_mod_ptr)) {
+    MS_LOG(EXCEPTION) << "Set input output size list failed.";
+  }
+  return kernel_mod_ptr;
+}
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.h
similarity index 95%
rename from mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.h
index a3c24ae49e9..6e2ee3959b7 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_
 #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_
 #include <memory>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.cc
similarity index 91%
rename from mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.cc
index 3670a2d76f7..76c29b9f5c9 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "kernel/aicpu/aicpu_kernel_metadata.h"
+#include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h"
 #include <memory>
 #include <string>
-#include "kernel/oplib/oplib.h"
-#include "kernel/common_utils.h"
-#include "kernel/aicpu/aicpu_util.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/kernel_compiler/aicpu/aicpu_util.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.h b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h
similarity index 95%
rename from mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.h
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h
index 74e667856e5..e21f4eace44 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h
@@ -20,7 +20,7 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc
index 2213f176cc4..e18b3169f31 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/aicpu/aicpu_kernel_mod.h"
+#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
 
 #include <memory>
 #include <vector>
@@ -23,9 +23,10 @@
 
 #include "runtime/mem.h"
 #include "runtime/rt.h"
-#include "kernel/aicpu/aicpu_kernel_build.h"
+#include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
 #include "utils/convert_utils.h"
-#include "kernel/aicpu/aicpu_util.h"
+#include "backend/kernel_compiler/aicpu/aicpu_util.h"
+#include "utils/context/ms_context.h"
 
 using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>;
 
@@ -144,8 +145,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
   if (node_name_ == kTopK) {
     node_name_ = kTopKV2;
   }
+
   AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
-    stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs);
+    kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());
 
   MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
   return {task_info_ptr};
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h
similarity index 96%
rename from mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.h
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h
index 3ee9bd2a15f..82260010ea9 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h
@@ -18,8 +18,8 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "kernel/ascend_kernel_mod.h"
-#include "kernel/aicpu/aicpu_util.h"
+#include "backend/kernel_compiler/ascend_kernel_mod.h"
+#include "backend/kernel_compiler/aicpu/aicpu_util.h"
 namespace mindspore {
 namespace kernel {
 class AicpuOpKernelMod : public AscendKernelMod {
diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_util.cc b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/aicpu/aicpu_util.cc
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.cc
index a617f56f8f0..790319daa6d 100644
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_util.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/aicpu/aicpu_util.h"
+#include "backend/kernel_compiler/aicpu/aicpu_util.h"
 #include <vector>
 #include <string>
 #include "proto/types.pb.h"
 #include "runtime/mem.h"
 #include "runtime/rt.h"
 #include "utils/convert_utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h
new file mode 100644
index 00000000000..fd4495afebb
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h
@@ -0,0 +1,64 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_
+#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_
+
+#include <cstdint>
+#include <vector>
+#include <map>
+#include <string>
+#include "backend/kernel_compiler/kernel.h"
+
+namespace mindspore {
+namespace kernel {
+constexpr auto kInitDataSetQueue = "InitDataSetQueue";
+constexpr auto kInitData = "InitData";
+constexpr auto kGetNext = "GetNext";
+constexpr auto kPrint = "Print";
+constexpr auto kPack = "Pack";
+constexpr auto kOutputTypes = "output_types";
+constexpr auto kOutputShapes = "output_shapes";
+constexpr auto kChannelName = "channel_name";
+constexpr auto kSharedName = "shared_name";
+constexpr auto kShapes = "shapes";
+constexpr auto kTypes = "types";
+constexpr auto kQueueName = "queue_name";
+constexpr auto kSeed = "seed";
+constexpr auto kSeed0 = "Seed0";
+constexpr auto kSeed1 = "Seed1";
+constexpr auto kSeed2 = "seed2";
+constexpr auto kTopK = "TopK";
+constexpr auto kTopKV2 = "TopKV2";
+
+struct AicpuParamHead {
+  uint32_t length;         // Total length: include cunstom message
+  uint32_t ioAddrNum;      // Input and output address number
+  uint32_t extInfoLength;  // extInfo struct Length
+  uint64_t extInfoAddr;    // extInfo address
+} __attribute__((packed));
+
+class AicpuOpUtil {
+ public:
+  static int MsTypeToProtoType(TypeId ms_type);
+
+ private:
+  // kernel id
+  static uint64_t KernelId_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_
diff --git a/mindspore/ccsrc/kernel/aicpu/proto/attr.proto b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/attr.proto
similarity index 100%
rename from mindspore/ccsrc/kernel/aicpu/proto/attr.proto
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/attr.proto
diff --git a/mindspore/ccsrc/kernel/aicpu/proto/node_def.proto b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/node_def.proto
similarity index 100%
rename from mindspore/ccsrc/kernel/aicpu/proto/node_def.proto
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/node_def.proto
diff --git a/mindspore/ccsrc/kernel/aicpu/proto/tensor.proto b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor.proto
similarity index 100%
rename from mindspore/ccsrc/kernel/aicpu/proto/tensor.proto
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor.proto
diff --git a/mindspore/ccsrc/kernel/aicpu/proto/tensor_shape.proto b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor_shape.proto
similarity index 100%
rename from mindspore/ccsrc/kernel/aicpu/proto/tensor_shape.proto
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor_shape.proto
diff --git a/mindspore/ccsrc/kernel/aicpu/proto/types.proto b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/types.proto
similarity index 100%
rename from mindspore/ccsrc/kernel/aicpu/proto/types.proto
rename to mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/types.proto
diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.cc
rename to mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.cc
index 018fbe4f2aa..73fdb5c11b5 100644
--- a/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/akg/akg_kernel_attrs_process.h"
+#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"
 
 #include <algorithm>
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.h b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.h
similarity index 98%
rename from mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.h
rename to mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.h
index 9d15d4f9e9b..9ba724db42a 100644
--- a/mindspore/ccsrc/kernel/akg/akg_kernel_attrs_process.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.h
@@ -22,7 +22,7 @@
 #include <unordered_map>
 #include "ir/anf.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc
similarity index 99%
rename from mindspore/ccsrc/kernel/akg/akg_kernel_build.cc
rename to mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc
index 0e8d93d47f8..9c13629b1bd 100644
--- a/mindspore/ccsrc/kernel/akg/akg_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/akg/akg_kernel_build.h"
+#include "backend/kernel_compiler/akg/akg_kernel_build.h"
 #include <Python.h>
 #include <sys/types.h>
 #include <signal.h>
@@ -35,8 +35,8 @@
 #include "utils/convert_utils.h"
 #include "utils/any.h"
 #include "utils/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/akg/akg_kernel_attrs_process.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h
similarity index 95%
rename from mindspore/ccsrc/kernel/akg/akg_kernel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h
index 15fa03f45bb..7b6a2f0b862 100644
--- a/mindspore/ccsrc/kernel/akg/akg_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h
@@ -22,11 +22,11 @@
 #include <memory>
 #include <map>
 #include <utility>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "ir/dtype.h"
 #include <nlohmann/json.hpp>
-#include "kernel/common_utils.h"
-#include "kernel/oplib/oplib.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.cc
similarity index 88%
rename from mindspore/ccsrc/kernel/akg/akg_kernel_metadata.cc
rename to mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.cc
index 3515add1e09..f3567428d35 100644
--- a/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "kernel/akg/akg_kernel_metadata.h"
+#include "backend/kernel_compiler/akg/akg_kernel_metadata.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/oplib/oplib.h"
-#include "kernel/common_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.h b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.h
similarity index 95%
rename from mindspore/ccsrc/kernel/akg/akg_kernel_metadata.h
rename to mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.h
index 5e329f0080b..02785c6cdbd 100644
--- a/mindspore/ccsrc/kernel/akg/akg_kernel_metadata.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.h
@@ -21,7 +21,7 @@
 #include <vector>
 #include <unordered_map>
 #include <memory>
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc
rename to mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc
index 7200a91ac0a..d698c89bc94 100644
--- a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/akg/ascend/akg_ascend_kernel_build.h"
+#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h"
 
 #include <algorithm>
 #include <map>
@@ -26,12 +26,12 @@
 #include <Python.h>
 #include "ir/dtype.h"
 #include "ir/func_graph.h"
-#include "kernel/kernel.h"
-#include "kernel/common_utils.h"
-#include "kernel/tbe/tbe_utils.h"
-#include "kernel/akg/ascend/akg_ascend_kernel_mod.h"
-#include "kernel/akg/akg_kernel_attrs_process.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h"
+#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h
similarity index 95%
rename from mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h
index 01752911ed0..713b65a4515 100644
--- a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h
@@ -22,8 +22,8 @@
 #include <vector>
 #include <map>
 #include "ir/anf.h"
-#include "kernel/kernel.h"
-#include "kernel/akg/akg_kernel_build.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/akg/akg_kernel_build.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc
rename to mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc
index 69fc82aad39..8bb4940778f 100644
--- a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/akg/ascend/akg_ascend_kernel_mod.h"
+#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h"
 #include <algorithm>
 #include <fstream>
 #include <map>
@@ -26,6 +26,7 @@
 #include "runtime/rt.h"
 #include "utils/log_adapter.h"
 #include "utils/convert_utils.h"
+#include "utils/context/ms_context.h"
 
 namespace mindspore {
 namespace kernel {
@@ -123,8 +124,8 @@ std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &in
   MS_LOG(DEBUG) << "The block_dim is:" << block_dim;
 
   TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
-    stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, input_data_addrs,
-    output_data_addrs, workspace_addrs);
+    kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data,
+    input_data_addrs, output_data_addrs, workspace_addrs, NeedDump());
   return {task_info_ptr};
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h
similarity index 95%
rename from mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.h
rename to mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h
index 18d342f6299..3ea36f1a23e 100644
--- a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h
@@ -19,8 +19,8 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include "kernel/ascend_kernel_mod.h"
-#include "kernel/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/ascend_kernel_mod.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc
similarity index 85%
rename from mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.cc
rename to mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc
index 534e355802c..96fcd1869e4 100644
--- a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "kernel/akg/gpu/akg_gpu_kernel_build.h"
+#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h"
 #include <vector>
 #include <memory>
-#include "kernel/kernel.h"
-#include "kernel/akg/akg_kernel_build.h"
-#include "kernel/akg/gpu/akg_gpu_kernel_mod.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/akg/akg_kernel_build.h"
+#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h
similarity index 93%
rename from mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h
index 3a1145140f8..abb6d1f0300 100644
--- a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h
@@ -16,8 +16,8 @@
 
 #ifndef MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
 #define MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
-#include "kernel/kernel.h"
-#include "ir/base.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "base/base.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_mod.cc
rename to mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc
index 64590cd9b82..d527f8ec761 100644
--- a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/akg/gpu/akg_gpu_kernel_mod.h"
+#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h"
 #include <fstream>
 #include <algorithm>
 #include "nlohmann/json.hpp"
diff --git a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h
similarity index 98%
rename from mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_mod.h
rename to mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h
index df9cb069f79..a6a17d033f2 100644
--- a/mindspore/ccsrc/kernel/akg/gpu/akg_gpu_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h
@@ -21,7 +21,7 @@
 #include <vector>
 #include <unordered_map>
 #include <memory>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/ascend_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/ascend_kernel_mod.h
similarity index 84%
rename from mindspore/ccsrc/kernel/ascend_kernel_mod.h
rename to mindspore/ccsrc/backend/kernel_compiler/ascend_kernel_mod.h
index 0aee881f7d6..c6398eda9e7 100644
--- a/mindspore/ccsrc/kernel/ascend_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/ascend_kernel_mod.h
@@ -20,7 +20,10 @@
 #include <vector>
 #include <memory>
 #include "framework/ge_runtime/task_info.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
+#ifdef ENABLE_DATA_DUMP
+#include "debug/data_dump_parser.h"
+#endif
 
 using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>;
 namespace mindspore {
@@ -31,6 +34,13 @@ class AscendKernelMod : public KernelMod {
                                            const std::vector<AddressPtr> &, uint32_t) = 0;
   uint32_t block_dim() { return block_dim_; }
   uint32_t stream_id() { return stream_id_; }
+  virtual bool NeedDump() {
+#ifdef ENABLE_DATA_DUMP
+    return DataDumpParser::GetInstance().NeedDump(kernel_name_);
+#else
+    return false;
+#endif
+  }
 
  protected:
   uint32_t block_dim_{1};
diff --git a/mindspore/ccsrc/kernel/common_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
similarity index 83%
rename from mindspore/ccsrc/kernel/common_utils.cc
rename to mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
index ab4f59e5495..f4495cdb9df 100644
--- a/mindspore/ccsrc/kernel/common_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/common_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
 #include <unordered_map>
 #include <map>
 #include <iostream>
@@ -22,16 +22,18 @@
 #include <fstream>
 #include <thread>
 #include "nlohmann/json.hpp"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 #include "ir/manager.h"
 #include "ir/meta_tensor.h"
 #include "ir/func_graph.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/graph_utils.h"
 
 namespace mindspore {
 namespace kernel {
+constexpr char kAxis[] = "axis";
+constexpr char kTypeInt32[] = "Int32";
 const std::unordered_map<std::string, TypeId> type_id_maps = {
   {"float", TypeId::kNumberTypeFloat32},   {"float16", TypeId::kNumberTypeFloat16},
   {"float32", TypeId::kNumberTypeFloat32}, {"float64", TypeId::kNumberTypeFloat64},
@@ -579,8 +581,40 @@ void WorkerForReduceSparseGradient(WorkerParamsForReduceSparseGradient param) {
   }
 }
 
+void RunMultiThreadReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad,
+                                        size_t outer_dim, std::vector<std::pair<int, size_t>> *sorted_indices,
+                                        std::vector<size_t> *slice_positions) {
+  MS_LOG(DEBUG) << "Start";
+  size_t thread_num = 24;
+  if (slice_positions->size() < thread_num) {
+    thread_num = slice_positions->size();
+  }
+  size_t stride = (slice_positions->size() + thread_num - 1) / thread_num;
+  thread_num = (slice_positions->size() + stride - 1) / stride;
+  std::vector<std::thread> threads;
+  size_t max_length = sorted_indices->size() * outer_dim;
+  for (size_t i = 0; i < thread_num; ++i) {
+    size_t slice_start = i * stride;
+    size_t slice_end = 0;
+    if (i == thread_num - 1) {
+      slice_end = slice_positions->size();
+    } else {
+      slice_end = slice_start + stride;
+    }
+    WorkerParamsForReduceSparseGradient params{
+      slice_start, slice_end, max_length, outer_dim, sorted_indices, slice_positions, origin_sparse_grad.value_,
+      unique_grad};
+    threads.emplace_back(std::thread(WorkerForReduceSparseGradient, params));
+  }
+  for (size_t i = 0; i < thread_num; ++i) {
+    threads[i].join();
+  }
+  MS_LOG(DEBUG) << "End";
+}
+
 void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
-                          size_t outer_dim) {
+                          size_t outer_dim, bool use_multi_threads) {
+  MS_LOG(DEBUG) << "Start";
   MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_);
   MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_);
   MS_EXCEPTION_IF_NULL(unique_grad);
@@ -599,37 +633,102 @@ void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradie
     [](const std::pair<int, size_t> &left, const std::pair<int, size_t> &right) { return left.first < right.first; });
   int last_index = 0;
   std::vector<size_t> slice_positions;
+  slice_positions.reserve(sorted_indices.size());
   for (size_t i = 0; i < sorted_indices.size(); ++i) {
     if (i == 0 || last_index != sorted_indices[i].first) {
       slice_positions.emplace_back(i);
     }
     last_index = sorted_indices[i].first;
   }
-  size_t thread_num = 8;
-  if (slice_positions.size() < thread_num) {
-    thread_num = slice_positions.size();
+  if (use_multi_threads) {
+    RunMultiThreadReduceSparseGradient(origin_sparse_grad, unique_grad, outer_dim, &sorted_indices, &slice_positions);
+  } else {
+    size_t max_length = sorted_indices.size() * outer_dim;
+    WorkerParamsForReduceSparseGradient params{0,
+                                               slice_positions.size(),
+                                               max_length,
+                                               outer_dim,
+                                               &sorted_indices,
+                                               &slice_positions,
+                                               origin_sparse_grad.value_,
+                                               unique_grad};
+    WorkerForReduceSparseGradient(params);
   }
-  size_t stride = (slice_positions.size() + thread_num - 1) / thread_num;
-  thread_num = (slice_positions.size() + stride - 1) / stride;
-  std::vector<std::thread> threads;
-  size_t max_length = sorted_indices.size() * outer_dim;
-  for (size_t i = 0; i < thread_num; ++i) {
-    size_t slice_start = i * stride;
-    size_t slice_end = 0;
-    if (i == thread_num - 1) {
-      slice_end = slice_positions.size();
-    } else {
-      slice_end = slice_start + stride;
+  unique_grad->indices_size_ = slice_positions.size();
+  MS_LOG(DEBUG) << "End";
+}
+
+void ReduceMultiSparseGradient(const std::vector<std::shared_ptr<SparseGradient>> &unique_slice_grads,
+                               SparseGradient *tmp_grad, SparseGradient *unique_grad, size_t first_dim,
+                               size_t outer_dim) {
+  MS_LOG(DEBUG) << "Start";
+  if (unique_slice_grads.empty()) {
+    return;
+  }
+  size_t index_data_size = outer_dim * sizeof(float);
+  size_t unique_indices_size = 0;
+  for (size_t i = 0; i < unique_slice_grads.size(); ++i) {
+    auto &slice_grad = unique_slice_grads[i];
+    auto ret_code = memcpy_s(tmp_grad->value_ + unique_indices_size * outer_dim,
+                             (tmp_grad->indices_size_ - unique_indices_size) * index_data_size, slice_grad->value_,
+                             slice_grad->indices_size_ * index_data_size);
+    if (ret_code != EOK) {
+      MS_LOG(EXCEPTION) << "Failed to copy data!";
     }
-    WorkerParamsForReduceSparseGradient params{
-      slice_start, slice_end, max_length, outer_dim, &sorted_indices, &slice_positions, origin_sparse_grad.value_,
-      unique_grad};
-    threads.emplace_back(std::thread(WorkerForReduceSparseGradient, params));
+    ret_code =
+      memcpy_s(tmp_grad->indices_ + unique_indices_size, (tmp_grad->indices_size_ - unique_indices_size) * sizeof(int),
+               slice_grad->indices_, slice_grad->indices_size_ * sizeof(int));
+    if (ret_code != EOK) {
+      MS_LOG(EXCEPTION) << "Failed to copy data!";
+    }
+    unique_indices_size += slice_grad->indices_size_;
+  }
+  tmp_grad->indices_size_ = unique_indices_size;
+  ReduceSparseGradient(*tmp_grad, unique_grad, first_dim, outer_dim);
+  MS_LOG(DEBUG) << "End";
+}
+
+void TwoLevelReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *tmp_grad,
+                                  SparseGradient *unique_grad, size_t first_dim, size_t outer_dim) {
+  MS_LOG(DEBUG) << "Start";
+  MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_);
+  MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_);
+  MS_EXCEPTION_IF_NULL(unique_grad);
+  MS_EXCEPTION_IF_NULL(unique_grad->value_);
+  MS_EXCEPTION_IF_NULL(unique_grad->indices_);
+  MS_EXCEPTION_IF_NULL(tmp_grad);
+  MS_EXCEPTION_IF_NULL(tmp_grad->value_);
+  MS_EXCEPTION_IF_NULL(tmp_grad->indices_);
+  size_t thread_num = 24;
+  if (origin_sparse_grad.indices_size_ < thread_num) {
+    thread_num = origin_sparse_grad.indices_size_;
+  }
+  size_t thread_indices_size = origin_sparse_grad.indices_size_ / thread_num;
+  size_t left_indices_size = origin_sparse_grad.indices_size_ % thread_num;
+  std::vector<std::thread> threads;
+  threads.reserve(thread_num);
+  std::vector<std::shared_ptr<SparseGradient>> unique_slice_grads;
+  for (size_t i = 0; i < thread_num; ++i) {
+    size_t indices_size = thread_indices_size;
+    if (i == thread_num - 1) {
+      indices_size = thread_indices_size + left_indices_size;
+    }
+    size_t value_offset = i * thread_indices_size * outer_dim;
+    size_t indices_offset = i * thread_indices_size;
+    auto slice_grad = SparseGradient(
+      {origin_sparse_grad.value_ + value_offset, origin_sparse_grad.indices_ + indices_offset, indices_size});
+    unique_slice_grads.emplace_back(std::make_shared<SparseGradient>());
+    unique_slice_grads[i]->value_ = unique_grad->value_ + value_offset;
+    unique_slice_grads[i]->indices_ = unique_grad->indices_ + indices_offset;
+    unique_slice_grads[i]->indices_size_ = indices_size;
+    threads.emplace_back(
+      std::thread(ReduceSparseGradient, slice_grad, unique_slice_grads[i].get(), first_dim, outer_dim, false));
   }
   for (size_t i = 0; i < thread_num; ++i) {
     threads[i].join();
   }
-  unique_grad->indices_size_ = slice_positions.size();
+  ReduceMultiSparseGradient(unique_slice_grads, tmp_grad, unique_grad, first_dim, outer_dim);
+  MS_LOG(DEBUG) << "End";
 }
 
 std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index) {
@@ -892,5 +991,39 @@ void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputePa
     threads[i].join();
   }
 }
+
+std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode) {
+  if (AnfAlgo::GetInputTensorNum(cnode) != AnfAlgo::GetOutputTensorNum(cnode) &&
+      AnfAlgo::GetInputTensorNum(cnode) != 1) {
+    MS_LOG(EXCEPTION) << "the kind of reduce node [" << cnode->DebugString()
+                      << "] is not single input or single output ";
+  }
+  std::vector<int> axis;
+  auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, 0);
+  auto primitive = AnfAlgo::GetCNodePrimitive(cnode);
+  MS_EXCEPTION_IF_NULL(primitive);
+  auto axis_attr = primitive->GetAttr(kAxis);
+  if (axis_attr == nullptr) {
+    MS_LOG(ERROR) << "This node does't have axie attr.";
+    return std::vector<int>();
+  }
+  auto type = axis_attr->type();
+  MS_EXCEPTION_IF_NULL(type);
+  std::vector<int> axis_list;
+  if (type->ToString() == kTypeInt32) {
+    axis_list.emplace_back(GetValue<int>(axis_attr));
+  } else {
+    axis_list = GetValue<std::vector<int>>(axis_attr);
+  }
+  for (const auto &elem : axis_list) {
+    if (elem < 0) {
+      axis.emplace_back(input_shape.size() + elem);
+    } else {
+      axis.emplace_back(elem);
+    }
+  }
+  AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(axis), cnode);
+  return axis;
+}
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/kernel/common_utils.h b/mindspore/ccsrc/backend/kernel_compiler/common_utils.h
similarity index 83%
rename from mindspore/ccsrc/kernel/common_utils.h
rename to mindspore/ccsrc/backend/kernel_compiler/common_utils.h
index e9d72848f68..8c9ea84b34e 100644
--- a/mindspore/ccsrc/kernel/common_utils.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.h
@@ -26,9 +26,9 @@
 #include <vector>
 #include <utility>
 #include <nlohmann/json.hpp>
-#include "kernel/kernel.h"
-#include "kernel/oplib/opinfo.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/oplib/opinfo.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace kernel {
@@ -115,7 +115,7 @@ int Sign(float x);
 void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
                               size_t outer_dim);
 void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
-                          size_t outer_dim);
+                          size_t outer_dim, bool use_multi_threads = true);
 std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index);
 std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(const std::vector<AnfNodePtr> &node_list,
                                                                             const std::vector<AnfNodePtr> &input_list);
@@ -130,6 +130,15 @@ void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<An
 bool IsWeightBoundary(const AnfNodePtr &node);
 void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params,
                         size_t total_compute_size);
+void RunMultiThreadReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad,
+                                        size_t outer_dim, std::vector<std::pair<int, size_t>> *sorted_indices,
+                                        std::vector<size_t> *slice_positions);
+void ReduceMultiSparseGradient(const std::vector<std::shared_ptr<SparseGradient>> &unique_slice_grads,
+                               SparseGradient *tmp_grad, SparseGradient *unique_grad, size_t first_dim,
+                               size_t outer_dim);
+void TwoLevelReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *tmp_grad,
+                                  SparseGradient *unique_grad, size_t first_dim, size_t outer_dim);
+std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode);
 }  // namespace kernel
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.cc
index 5b3194608e7..1300847d40f 100644
--- a/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.cc
@@ -14,9 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/cpu/addn_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "ir/primitive.h"
+#include "backend/kernel_compiler/cpu/addn_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.h
index 1a1a9157d9c..925f0fab507 100644
--- a/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.cc
index 9cc5126c082..55afecb8fac 100644
--- a/mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.cc
@@ -13,10 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/allgather_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "device/cpu/mpi/mpi_adapter.h"
-#include "ir/primitive.h"
+#include "backend/kernel_compiler/cpu/allgather_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
+#include "runtime/device/cpu/mpi/mpi_adapter.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.h
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.h
index 1dddf810efe..42c83ccf0b0 100644
--- a/mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.cc
similarity index 90%
rename from mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.cc
index 3cd6c574139..c1ff8d54bdd 100644
--- a/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/apply_momentum_cpu_kernel.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h
index c0ca581974a..23e84888909 100644
--- a/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.cc
index ee328df7218..d67c4d47ff9 100644
--- a/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/argmax_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/argmax_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.h
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.h
index aae7435c5c6..3883344f961 100644
--- a/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.cc
index 00f30172311..f42bb6807d1 100644
--- a/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/cpu/bias_add_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/bias_add_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.h
index 516a21147ba..c572f68230d 100644
--- a/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.h
@@ -18,8 +18,8 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.cc
index 1d9c7d076ee..8b6e2d01887 100644
--- a/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/cpu/bias_add_grad_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h
index e3ac8960967..a5743879a7d 100644
--- a/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h
@@ -19,8 +19,8 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/concat_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/cpu/concat_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc
index d8f2ef421bf..6776c0f154e 100644
--- a/mindspore/ccsrc/kernel/cpu/concat_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc
@@ -14,9 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/cpu/concat_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "ir/primitive.h"
+#include "backend/kernel_compiler/cpu/concat_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/concat_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/concat_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.h
index 46f9078178d..94e4ad40f37 100644
--- a/mindspore/ccsrc/kernel/cpu/concat_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc
index 2be05038d66..fb9398e7c45 100644
--- a/mindspore/ccsrc/kernel/cpu/cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
index 08365298401..f2aa292c6e6 100644
--- a/mindspore/ccsrc/kernel/cpu/cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
@@ -21,9 +21,9 @@
 #include <memory>
 #include <numeric>
 #include <functional>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "ir/anf.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 using mindspore::kernel::Address;
 using mindspore::kernel::AddressPtr;
@@ -55,7 +55,7 @@ class CPUKernel : public kernel::KernelMod {
  public:
   CPUKernel() = default;
   ~CPUKernel() override = default;
-  void Init(const CNodePtr &kernel_node);
+  virtual void Init(const CNodePtr &kernel_node);
   virtual void InitKernel(const CNodePtr &kernel_node) = 0;
   bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
               const std::vector<AddressPtr> &outputs, void * /*stream_ptr*/) override {
diff --git a/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc
index bcda7af9fd9..accd7429760 100644
--- a/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 #include <memory>
 #include <iostream>
 #include <string>
 
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
@@ -38,7 +38,7 @@ void CPUKernelFactory::Register(const std::string &kernel_name, const KernelAttr
 }
 
 std::shared_ptr<CPUKernel> CPUKernelFactory::Create(const std::string &kernel_name, const CNodePtr &apply_kernel) {
-  auto kernel_info = apply_kernel->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(apply_kernel->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   const KernelBuildInfo *kernel_build_Info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(kernel_build_Info);
diff --git a/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.h
similarity index 85%
rename from mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.h
index 52eda12ba7c..80f9a342acd 100644
--- a/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.h
@@ -24,8 +24,8 @@
 #include <vector>
 
 #include "common/utils.h"
-#include "kernel/cpu/cpu_kernel.h"
-#include "device/cpu/kernel_select_cpu.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "runtime/device/cpu/kernel_select_cpu.h"
 
 namespace mindspore {
 namespace kernel {
@@ -62,10 +62,12 @@ class CPUKernelRegistrar {
   static const CPUKernelRegistrar g_cpu_kernel_##COUNT##_reg(#OPNAME, ATTR,                \
                                                              []() { return std::make_shared<OPCLASS>(); });
 
-#define MS_REG_CPU_KERNEL_T(OPNAME, ATTR, OPCLASS, T)                                         \
+#define MS_REG_CPU_KERNEL_T(OPNAME, ATTR, OPCLASS, T) MS_REG_CPU_KERNEL_T_(__COUNTER__, OPNAME, ATTR, OPCLASS, T)
+#define MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T) _MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T)
+#define _MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T)                                \
   static_assert(std::is_base_of<CPUKernel, OPCLASS<T>>::value, " must be base of CPUKernel"); \
-  static const CPUKernelRegistrar g_cpu_kernel_##OPNAME##_##T##_reg(#OPNAME, ATTR,            \
-                                                                    []() { return std::make_shared<OPCLASS<T>>(); });
+  static const CPUKernelRegistrar g_cpu_kernel_##COUNT##_##OPNAME##_##T##_reg(                \
+    #OPNAME, ATTR, []() { return std::make_shared<OPCLASS<T>>(); });
 
 #define MS_REG_CPU_KERNEL_T_S(OPNAME, ATTR, OPCLASS, T, S)                                       \
   static_assert(std::is_base_of<CPUKernel, OPCLASS<T, S>>::value, " must be base of CPUKernel"); \
diff --git a/mindspore/ccsrc/kernel/cpu/debug_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/debug_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.cc
index a1dcaca3f31..344f03cc53c 100644
--- a/mindspore/ccsrc/kernel/cpu/debug_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/debug_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/debug_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 #ifdef ENABLE_DEBUGGER
 #include "debug/debugger/debugger.h"
diff --git a/mindspore/ccsrc/kernel/cpu/debug_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.h
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/debug_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.h
index da9f3286b95..18302e8992f 100644
--- a/mindspore/ccsrc/kernel/cpu/debug_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.h
@@ -18,8 +18,8 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.cc
index 07da3dcc25a..1bcc36faa4b 100644
--- a/mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.cc
@@ -14,10 +14,9 @@
  * limitations under the License.
  */
 #include <thread>
-#include "kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "device/cpu/mpi/mpi_adapter.h"
-#include "ir/primitive.h"
+#include "backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
+#include "runtime/device/cpu/mpi/mpi_adapter.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h
index 7222bd9be18..3e3807f58e7 100644
--- a/mindspore/ccsrc/kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc
index c8c2c667add..b2feb9204fd 100644
--- a/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc
@@ -15,9 +15,9 @@
  */
 #include <thread>
 #include <string>
-#include "kernel/cpu/embedding_look_up_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "device/cpu/mpi/mpi_adapter.h"
+#include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
+#include "runtime/device/cpu/mpi/mpi_adapter.h"
 #include "ir/primitive.h"
 
 namespace mindspore {
@@ -36,7 +36,9 @@ void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   }
   output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
   axis_ = 4 - input_shape_.size();
-  reduce_scatter_flag_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "reduce_scatter_flag");
+  if (AnfAlgo::HasNodeAttr(kAttrReduceScatterFlag, kernel_node)) {
+    reduce_scatter_flag_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, kAttrReduceScatterFlag);
+  }
 #ifdef ENABLE_MPI
   if (reduce_scatter_flag_) {
     size_t gatherv2_out_lens = 1;
@@ -65,7 +67,9 @@ void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) {
     MS_LOG(EXCEPTION) << "Not Enable MPI, please build version with -M on when set reduce_scatter_flag true";
   }
 #endif
-  offset_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "offset");
+  if (AnfAlgo::HasNodeAttr(kAttrOffset, kernel_node)) {
+    offset_ = AnfAlgo::GetNodeAttr<int>(kernel_node, kAttrOffset);
+  }
   CPUKernelUtils::ExpandDimsTo4(&input_shape_);
   CPUKernelUtils::ExpandDimsTo4(&output_shape_);
 }
diff --git a/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h
index d839571caac..6c61ee346c4 100644
--- a/mindspore/ccsrc/kernel/cpu/embedding_look_up_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.cc
index 60e7eafa789..a61cd185c61 100644
--- a/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/equal_count_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/equal_count_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.h
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.h
index 13083889d0c..6e4ed6d5f17 100644
--- a/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.cc
index 28090817cb1..73b11f1c01f 100644
--- a/mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.cc
@@ -13,9 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/gather_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "ir/primitive.h"
+#include "backend/kernel_compiler/cpu/gather_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.h
index 2ffd7df4d49..8fdac0dfdec 100644
--- a/mindspore/ccsrc/kernel/cpu/gather_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_GATHER_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.cc
index 657c85dc484..e58b1d319cd 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/conv2d_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h"
 #include <string>
 #include "common/utils.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h
index 1cb100299e9..c0c64ba4dae 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc
index fbfebaf56ee..3fa6a914058 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h"
 #include <string>
 #include "common/utils.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h
index 49559f452b2..ae8269c1424 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
index ff0b8633d4c..1f02d70f86e 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h"
 #include <string>
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h
index 9fb024a279d..6f699130a8b 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.cc
index 0a343785f75..626fd1934e2 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/lstm_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h"
 #include <string>
 #include "common/utils.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h
index d42ff803f07..761494a931f 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h
@@ -24,7 +24,7 @@
 #endif
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 namespace mindspore {
 namespace kernel {
 class LstmCPUKernel : public MKLCPUKernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.cc
index d7e7701d85d..56da8ec808c 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h"
 #include <cstring>
 #include <cmath>
 #include <numeric>
 #include <string>
 #include "common/utils.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h
index 1f3fb824c0c..b95b5ba7920 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.cc
index 28266f2aa06..4bbaa6459f5 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/matmul_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.h"
 #include <algorithm>
 #include <utility>
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
 #include "common/utils.h"
-#include "device/cpu/cpu_device_address.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.h
index 10276d01fa3..ef52f652d08 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc
index a38470e3a3a..c71abe809de 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 #include <vector>
 #include <string>
 #include <algorithm>
 #include "common/utils.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h
index 10a860afff5..fc7128b10e1 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h
@@ -21,8 +21,8 @@
 #include <memory>
 #include <vector>
 #include "dnnl.hpp"
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.cc
index 5ae9791b127..777668f9605 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
 #include "utils/log_adapter.h"
 #include "dnnl.hpp"
 
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h
similarity index 100%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.cc
index 4f77508004d..fddd769047a 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/mul_cpu_kernel.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h
index 1131fd594c2..182679f59d5 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.cc
index 5225050dc18..e4bedf23b94 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/pooling_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h"
 #include <string>
 #include <algorithm>
 #include "common/utils.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h
index 4993d0834db..8187eaffdab 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.cc
index c0459de7908..8189df07ffa 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h"
 #include <string>
 #include <utility>
 #include <algorithm>
 #include "common/utils.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h
index cdb2c69ef0e..95a7bb3f66c 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h
@@ -19,7 +19,7 @@
 #include <vector>
 #include <memory>
 #include <utility>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.cc
similarity index 91%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.cc
index d5ef20a25e2..29ac9a10628 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/relu_cpu_kernel.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h
index 26905e267d0..a2da2480e2b 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.cc
index 4a6213ddf2b..9139aa78624 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/relu_grad_cpu_kernel.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h
index f0a77ee2822..c895ab2756d 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.cc
index 7fa740cfc05..94271b8a699 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/softmax_cpu_kernel.h"
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h
index 6acb9e5b9b5..2812dd31af2 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc
index 05b1a79924d..889e2abdec8 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h"
 #include <numeric>
 #include <functional>
 #include <cmath>
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h
index f663508059f..d05cb49b7b2 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
index c33fcd246f8..b8bf7b318ac 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h"
 #include <numeric>
 #include <functional>
 #include <cmath>
-#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
index 6391b27de67..0d79b0514b0 100644
--- a/mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.cc
index 00dfe73f28b..5bbc9f49a24 100644
--- a/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/one_hot_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/one_hot_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.h
index ef130473433..393b0e8c41b 100644
--- a/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_ONE_HOT_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.cc
new file mode 100644
index 00000000000..6537c888408
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.cc
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+bool ApplyMomentumPSKernel::Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                                    const std::vector<AddressPtr> &outputs) {
+  return Launch(inputs, workspace, outputs);
+}
+
+const std::vector<size_t> &ApplyMomentumPSKernel::input_sizes() const { return GetInputSizeList(); }
+
+const std::vector<size_t> &ApplyMomentumPSKernel::output_sizes() const { return GetOutputSizeList(); }
+
+const std::vector<size_t> &ApplyMomentumPSKernel::workspace_sizes() const { return GetWorkspaceSizeList(); }
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.h
new file mode 100644
index 00000000000..a78f40d04ba
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.h
@@ -0,0 +1,43 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_PS_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_PS_KERNEL_H_
+
+#include <vector>
+#include <memory>
+#include "backend/kernel_compiler/cpu/ps/pserver_kernel.h"
+#include "backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+class ApplyMomentumPSKernel : public ApplyMomentumCPUKernel, public PServerKernel {
+ public:
+  ApplyMomentumPSKernel(size_t rank_id, size_t pserver_num) : PServerKernel(rank_id, pserver_num) {}
+  ~ApplyMomentumPSKernel() override = default;
+
+  bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+               const std::vector<AddressPtr> &outputs) override;
+
+  const std::vector<size_t> &input_sizes() const override;
+  const std::vector<size_t> &output_sizes() const override;
+  const std::vector<size_t> &workspace_sizes() const override;
+};
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_PS_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.cc
new file mode 100644
index 00000000000..59ab65014be
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.cc
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.h"
+#include <vector>
+#include "frontend/parallel/ps/worker.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) {
+  EmbeddingLookUpCPUKernel::InitKernel(kernel_node);
+
+  for (auto dim : input_shape_) {
+    input_dims_ *= dim;
+  }
+
+  if (mindspore::parallel::ps::Util::IsRoleOfWorker()) {
+    key_ = AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
+  }
+  std::vector<size_t> keys{key_, key_, key_};
+  std::vector<size_t> values;
+  values.insert(values.end(), input_shape_.begin(), input_shape_.end());
+  values.insert(values.end(), indices_shape_.begin(), indices_shape_.end());
+  values.insert(values.end(), output_shape_.begin(), output_shape_.end());
+  std::vector<int> lens{SizeToInt(input_shape_.size()), SizeToInt(indices_shape_.size()),
+                        SizeToInt(output_shape_.size())};
+  const char *env_role = getenv(mindspore::parallel::ps::kEnvRole);
+  if (env_role != nullptr && strcmp(env_role, mindspore::parallel::ps::kEnvRoleOfWorker) == 0) {
+    parallel::ps::Worker<float>::GetInstance().AddEmbeddingTable(key_, input_shape_[axis_]);
+    parallel::ps::Worker<float>::GetInstance().InitPSEmbeddingTable(keys, values, lens);
+  }
+}
+
+bool EmbeddingLookUpProxyKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
+                                        const std::vector<kernel::AddressPtr> & /*workspace*/,
+                                        const std::vector<kernel::AddressPtr> &outputs) {
+  auto indices_addr = reinterpret_cast<int *>(inputs[1]->addr);
+  auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
+  size_t input_size = inputs[1]->size;
+  size_t output_size = outputs[0]->size;
+
+  size_t size = input_size / sizeof(float);
+  ::ps::SArray<float> lookup_ids(size, 0);
+  ::ps::SArray<int> lengths{size};
+  ::ps::SArray<float> lookup_result;
+
+  auto ret = memcpy_s(lookup_ids.data(), input_size, indices_addr, input_size);
+  if (ret != EOK) {
+    MS_LOG(EXCEPTION) << "Lookup id memcpy failed.";
+  }
+  parallel::ps::Worker<float>::GetInstance().DoPSEmbeddingLookup({key_}, lookup_ids, lengths, lookup_result,
+                                                                 parallel::ps::kEmbeddingLookupCmd);
+
+  auto ret2 = memcpy_s(output_addr, output_size, lookup_result.data(), output_size);
+  if (ret2 != EOK) {
+    MS_LOG(EXCEPTION) << "Lookup result memcpy failed.";
+  }
+  return true;
+}
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.h
new file mode 100644
index 00000000000..45e0a23fcb6
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.h
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_
+
+#include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h"
+#include <vector>
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+class EmbeddingLookUpProxyKernel : public EmbeddingLookUpCPUKernel {
+ public:
+  EmbeddingLookUpProxyKernel() = default;
+  ~EmbeddingLookUpProxyKernel() override = default;
+
+  void InitKernel(const CNodePtr &kernel_node) override;
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs) override;
+
+ private:
+  size_t key_{0};
+  size_t input_dims_{1};
+};
+
+MS_REG_CPU_KERNEL(
+  EmbeddingLookupProxy,
+  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32),
+  EmbeddingLookUpProxyKernel);
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.cc
new file mode 100644
index 00000000000..bcb3ca8ae8a
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.cc
@@ -0,0 +1,87 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.h"
+#include <functional>
+#include <vector>
+#include <memory>
+#include "backend/kernel_compiler/common_utils.h"
+#include "frontend/parallel/ps/util.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+using mindspore::parallel::ps::Util;
+void EmbeddingLookUpPSKernel::InitKernel(
+  const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
+  input_shape_ = *(shape_vec[0]);
+  input_lens_ = 1;
+  for (auto shape : input_shape_) {
+    input_lens_ = input_lens_ * shape;
+  }
+  indices_shape_ = *(shape_vec[1]);
+  indices_lens_ = 1;
+  for (auto shape : indices_shape_) {
+    indices_lens_ = indices_lens_ * shape;
+  }
+  output_shape_ = *(shape_vec[2]);
+  axis_ = 2;
+  reduce_scatter_flag_ = false;
+
+  size_t offset = 0;
+  for (size_t i = 0; i < rank_id_; i++) {
+    offset += Util::LocalShard(input_shape_[axis_], i, pserver_num_);
+  }
+  offset_ = offset;
+  split_num_ = pserver_num_;
+
+  // input shape should be sharded after computing offset_;
+  Shard(input_shape_, axis_);
+
+  size_t output_size =
+    std::accumulate(output_shape_.begin(), output_shape_.end(), sizeof(float), std::multiplies<size_t>());
+  output_size_list_.emplace_back(output_size);
+  CPUKernelUtils::ExpandDimsTo4(&input_shape_);
+  CPUKernelUtils::ExpandDimsTo4(&output_shape_);
+}
+
+void EmbeddingLookUpPSKernel::ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
+  const auto &indices_shape_ = *(shape_vec[0]);
+  indices_lens_ = indices_shape_[0];
+
+  size_t output_size = sizeof(float) * indices_lens_;
+  for (size_t i = axis_ + 1; i < input_shape_.size(); i++) {
+    output_size *= input_shape_[i];
+  }
+  output_size_list_.clear();
+  output_size_list_.emplace_back(output_size);
+}
+
+bool EmbeddingLookUpPSKernel::Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                                      const std::vector<AddressPtr> &outputs) {
+  return Launch(inputs, workspace, outputs);
+}
+
+const std::vector<size_t> &EmbeddingLookUpPSKernel::input_sizes() const { return input_shape_; }
+
+const std::vector<size_t> &EmbeddingLookUpPSKernel::output_sizes() const { return GetOutputSizeList(); }
+
+const std::vector<size_t> &EmbeddingLookUpPSKernel::workspace_sizes() const { return GetWorkspaceSizeList(); }
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.h
new file mode 100644
index 00000000000..e23a90a11c3
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.h
@@ -0,0 +1,46 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_
+
+#include <vector>
+#include <memory>
+#include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/ps/pserver_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+class EmbeddingLookUpPSKernel : public EmbeddingLookUpCPUKernel, public PServerKernel {
+ public:
+  EmbeddingLookUpPSKernel(size_t rank_id, size_t pserver_num) : PServerKernel(rank_id, pserver_num) {}
+  ~EmbeddingLookUpPSKernel() override = default;
+
+  void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
+  void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
+
+  bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+               const std::vector<AddressPtr> &outputs) override;
+
+  const std::vector<size_t> &input_sizes() const override;
+  const std::vector<size_t> &output_sizes() const override;
+  const std::vector<size_t> &workspace_sizes() const override;
+};
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc
new file mode 100644
index 00000000000..3aa421881a0
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc
@@ -0,0 +1,24 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/cpu/ps/pserver_kernel.h"
+#include "frontend/parallel/ps/util.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.h
new file mode 100644
index 00000000000..a2b6c4fa616
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.h
@@ -0,0 +1,57 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_PS_PSERVER_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_PS_PSERVER_KERNEL_H_
+
+#include <vector>
+#include <memory>
+#include "backend/kernel_compiler/kernel.h"
+#include "frontend/parallel/ps/util.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+using mindspore::parallel::ps::Util;
+class PServerKernel {
+ public:
+  PServerKernel(size_t rank_id, size_t pserver_num) : rank_id_(rank_id), pserver_num_(pserver_num) {}
+  ~PServerKernel() = default;
+  PServerKernel(const PServerKernel &) = delete;
+  PServerKernel &operator=(const PServerKernel &) = delete;
+
+  virtual void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) {}
+  virtual void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) {}
+  virtual bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                       const std::vector<AddressPtr> &outputs) = 0;
+
+  virtual const std::vector<size_t> &input_sizes() const = 0;
+  virtual const std::vector<size_t> &output_sizes() const = 0;
+  virtual const std::vector<size_t> &workspace_sizes() const = 0;
+
+ protected:
+  virtual void ReInit(const std::vector<AddressPtr> &) {}
+  void Shard(std::vector<size_t> *shape, int axis) {
+    (*shape)[axis] = Util::LocalShard((*shape)[axis], rank_id_, pserver_num_);
+  }
+
+  size_t rank_id_;
+  size_t pserver_num_;
+};
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_PS_PSERVER_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.cc
new file mode 100644
index 00000000000..92c901d4c8b
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.cc
@@ -0,0 +1,25 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/cpu/ps/pull_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_CPU_KERNEL_T(
+  Pull, KernelAttr().AddInputAttr(kNumberTypeUInt64).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  PullKernel, float);
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.h
new file mode 100644
index 00000000000..84dd9b819ed
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.h
@@ -0,0 +1,85 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_PS_PULL_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_PS_PULL_KERNEL_H_
+
+#include <vector>
+#include <string>
+#include "frontend/parallel/ps/worker.h"
+#include "frontend/parallel/ps/util.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+template <typename T>
+class PullKernel : public CPUKernel {
+ public:
+  PullKernel() : keys_size_(sizeof(size_t)), var_size_(sizeof(size_t)) {}
+  ~PullKernel() override = default;
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, const std::vector<AddressPtr> &) {
+    // If the paramter is embedding table, don't Pull from PServer.
+    if (param_name_.find("embedding") == std::string::npos && param_name_.find("wide_w") == std::string::npos) {
+      parallel::ps::Worker<T>::GetInstance().Pull(key_, inputs[1]->addr, inputs[1]->size);
+    }
+    return true;
+  }
+  void Init(const CNodePtr &kernel_node) {
+    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
+    if (input_num != 2) {
+      MS_LOG(ERROR) << "Input number is " << input_num << ", but pull needs 2 inputs.";
+      return;
+    }
+
+    auto key_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    for (size_t i = 0; i < key_shape.size(); i++) {
+      keys_size_ *= key_shape[i];
+    }
+    auto var_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    for (size_t i = 0; i < var_shape.size(); i++) {
+      var_size_ *= var_shape[i];
+    }
+    auto param_node = AnfAlgo::GetInputNode(kernel_node, 1);
+    MS_EXCEPTION_IF_NULL(param_node);
+    param_name_ = param_node->fullname_with_scope();
+
+    if (mindspore::parallel::ps::Util::IsRoleOfWorker()) {
+      key_ = AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
+    }
+    InitSizeLists();
+    return;
+  }
+  void InitKernel(const CNodePtr &kernel_node) { return; }
+
+ protected:
+  void InitSizeLists() {
+    input_size_list_.push_back(keys_size_);
+    input_size_list_.push_back(var_size_);
+    output_size_list_.push_back(0);
+  }
+
+ private:
+  size_t key_;
+  size_t keys_size_;
+  size_t var_size_;
+  std::string param_name_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_PS_PULL_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.cc
new file mode 100644
index 00000000000..96c1f15bda7
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.cc
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/cpu/ps/push_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_CPU_KERNEL_T(Push,
+                    KernelAttr()
+                      .AddInputAttr(kNumberTypeFloat32)
+                      .AddInputAttr(kNumberTypeFloat32)
+                      .AddInputAttr(kNumberTypeFloat32)
+                      .AddInputAttr(kNumberTypeFloat32)
+                      .AddInputAttr(kNumberTypeFloat32)
+                      .AddInputAttr(kNumberTypeFloat32)
+                      .AddInputAttr(kNumberTypeFloat32)
+                      .AddInputAttr(kNumberTypeInt32)
+                      .AddOutputAttr(kNumberTypeUInt64),
+                    PushKernel, float);
+
+MS_REG_CPU_KERNEL_T(
+  Push, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeUInt64),
+  PushKernel, float);
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.h
new file mode 100644
index 00000000000..938792f3bfd
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.h
@@ -0,0 +1,80 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_PS_PUSH_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_PS_PUSH_KERNEL_H_
+
+#include <vector>
+#include <algorithm>
+#include "frontend/parallel/ps/worker.h"
+#include "frontend/parallel/ps/util.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+template <typename T>
+class PushKernel : public CPUKernel {
+ public:
+  PushKernel() : key_(UINT64_MAX) {}
+  ~PushKernel() override = default;
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs) {
+    std::vector<size_t> keys;
+    std::vector<uintptr_t> addrs;
+    std::vector<int> sizes;
+    for (auto input : inputs) {
+      keys.push_back(key_);
+      addrs.push_back(reinterpret_cast<uintptr_t>(input->addr));
+      sizes.push_back(SizeToInt(input->size) / sizeof(T));
+    }
+    parallel::ps::Worker<T>::GetInstance().Push(keys, addrs, sizes);
+    memcpy(outputs[0]->addr, &key_, sizeof(size_t));
+    return true;
+  }
+
+  void Init(const CNodePtr &kernel_node) {
+    key_ = AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
+    auto optim_input_shapes = AnfAlgo::GetNodeAttr<std::vector<std::vector<int>>>(kernel_node, "optim_input_shapes");
+    std::vector<int> only_shape_indices = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "only_shape_indices");
+    MS_LOG(INFO) << "Key " << key_ << " optimizer input shapes are:" << optim_input_shapes;
+    MS_LOG(INFO) << "Only init shape indices are " << only_shape_indices;
+    for (size_t i = 0; i < optim_input_shapes.size(); i++) {
+      auto shape = optim_input_shapes[i];
+      mindspore::parallel::ps::Worker<float>::GetInstance().SetOptimInputShapes(key_, shape);
+      if (std::count(only_shape_indices.begin(), only_shape_indices.end(), i) == 0) {
+        size_t size = sizeof(T);
+        for (size_t j = 0; j < shape.size(); j++) {
+          size *= shape[j];
+        }
+        input_size_list_.push_back(size);
+      }
+    }
+
+    output_size_list_.push_back(sizeof(size_t));
+    return;
+  }
+
+  void InitKernel(const CNodePtr &kernel_node) { return; }
+
+ private:
+  size_t key_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_PS_PUSH_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.cc
new file mode 100644
index 00000000000..c7283954f89
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.cc
@@ -0,0 +1,100 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.h"
+#include <memory>
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/cpu/cpu_device_address.h"
+#include "frontend/parallel/ps/util.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+void SparseApplyAdamPSKernel::InitKernel(
+  const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
+  std::vector<size_t> &var_shape = *(shape_vec[0]);
+  std::vector<size_t> &m_shape = *(shape_vec[1]);
+  std::vector<size_t> &v_shape = *(shape_vec[2]);
+  const std::vector<size_t> &grad_shape = *(shape_vec[9]);
+  const std::vector<size_t> &indices_shape = *(shape_vec[10]);
+
+  Shard(&var_shape, 0);
+  Shard(&m_shape, 0);
+  Shard(&v_shape, 0);
+
+  if (!IsSameShape(var_shape, m_shape)) {
+    MS_LOG(EXCEPTION) << "var and m should have the same shape";
+  }
+  if (!IsSameShape(var_shape, v_shape)) {
+    MS_LOG(EXCEPTION) << "var and v should have the same shape";
+  }
+  var_first_dim_size_ = var_shape[0];
+  for (size_t i = 1; i < var_shape.size(); ++i) {
+    if (var_shape[i] != grad_shape[i]) {
+      MS_LOG(EXCEPTION) << "The shape of var and grad must equal in dimension " << i;
+    }
+    var_outer_dim_size_ *= var_shape[i];
+  }
+  if (indices_shape.size() != 1) {
+    MS_LOG(EXCEPTION) << "indices must be 1D";
+  }
+  indices_size_ = indices_shape[0];
+  if (grad_shape[0] != indices_size_) {
+    MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices";
+  }
+  /*
+  if (AnfAlgo::HasNodeAttr(USE_NESTEROV, kernel_node)) {
+    use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "use_nesterov");
+  }
+  */
+  workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
+  workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
+  workspace_size_list_.emplace_back(var_first_dim_size_ * var_outer_dim_size_ * sizeof(float));
+}
+
+void SparseApplyAdamPSKernel::ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
+  const std::vector<size_t> &indices_shape = *(shape_vec[0]);
+  indices_size_ = indices_shape[0];
+  workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
+  workspace_size_list_[1] = indices_size_ * sizeof(int);
+}
+
+void SparseApplyAdamPSKernel::ReInit(const std::vector<AddressPtr> &inputs) {
+  const auto &indices_addr = inputs[10];
+  indices_size_ = indices_addr->size;
+  workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
+  workspace_size_list_[1] = indices_size_ * sizeof(int);
+}
+
+bool SparseApplyAdamPSKernel::Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                                      const std::vector<AddressPtr> &outputs) {
+  ReInit(inputs);
+  int *indices = reinterpret_cast<int *>(inputs[10]->addr);
+  for (size_t i = 0; i < inputs[10]->size / sizeof(int); i++) {
+    indices[i] -= rank_id_ * var_first_dim_size_;
+  }
+  return Launch(inputs, workspace, outputs);
+}
+
+const std::vector<size_t> &SparseApplyAdamPSKernel::input_sizes() const { return GetInputSizeList(); }
+
+const std::vector<size_t> &SparseApplyAdamPSKernel::output_sizes() const { return GetOutputSizeList(); }
+
+const std::vector<size_t> &SparseApplyAdamPSKernel::workspace_sizes() const { return GetWorkspaceSizeList(); }
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.h
new file mode 100644
index 00000000000..337fcb3bf08
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.h
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_ADAM_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_ADAM_PS_KERNEL_H_
+
+#include <vector>
+#include <memory>
+#include "backend/kernel_compiler/cpu/ps/pserver_kernel.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+using mindspore::kernel::SparseApplyAdamCPUKernel;
+class SparseApplyAdamPSKernel : public SparseApplyAdamCPUKernel, public PServerKernel {
+ public:
+  SparseApplyAdamPSKernel(size_t rank_id, size_t pserver_num) : PServerKernel(rank_id, pserver_num) {}
+  ~SparseApplyAdamPSKernel() override = default;
+
+  void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
+  void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
+  bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+               const std::vector<AddressPtr> &outputs) override;
+
+  const std::vector<size_t> &input_sizes() const override;
+  const std::vector<size_t> &output_sizes() const override;
+  const std::vector<size_t> &workspace_sizes() const override;
+
+ protected:
+  void ReInit(const std::vector<AddressPtr> &) override;
+};
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_ADAM_PS_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.cc
new file mode 100644
index 00000000000..0392bd5a696
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.cc
@@ -0,0 +1,89 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+void SparseApplyFtrlPSKernel::InitKernel(
+  const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
+  std::vector<size_t> var_shape = *(shape_vec[0]);
+  std::vector<size_t> accum_shape = *(shape_vec[1]);
+  std::vector<size_t> linear_shape = *(shape_vec[2]);
+  std::vector<size_t> grad_shape = *(shape_vec[3]);
+  std::vector<size_t> indices_shape = *(shape_vec[4]);
+
+  Shard(&var_shape, 0);
+  Shard(&accum_shape, 0);
+  Shard(&linear_shape, 0);
+
+  var_first_dim_size_ = var_shape[0];
+  for (size_t i = 1; i < var_shape.size(); ++i) {
+    if (var_shape[i] != grad_shape[i]) {
+      MS_LOG(EXCEPTION) << "The shape of var and grad must equal in dimension " << i;
+    }
+    var_outer_dim_size_ *= var_shape[i];
+  }
+  if (indices_shape.size() != 1) {
+    MS_LOG(EXCEPTION) << "indices must be a 1D vector";
+  }
+  indices_size_ = indices_shape[0];
+  if (grad_shape[0] != indices_size_) {
+    MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices";
+  }
+  lr_ = 0.01;
+  l1_ = 1e-8;
+  l2_ = 1e-8;
+  lr_power_ = -0.5;
+  workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
+  workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
+}
+
+void SparseApplyFtrlPSKernel::ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
+  std::vector<size_t> indices_shape = *(shape_vec[0]);
+  indices_size_ = indices_shape[0];
+  workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
+  workspace_size_list_[1] = indices_size_ * sizeof(int);
+}
+
+void SparseApplyFtrlPSKernel::ReInit(const std::vector<AddressPtr> &inputs) {
+  const auto &indices_addr = inputs[4];
+  indices_size_ = indices_addr->size;
+  workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
+  workspace_size_list_[1] = indices_size_ * sizeof(int);
+}
+
+bool SparseApplyFtrlPSKernel::Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                                      const std::vector<AddressPtr> &outputs) {
+  ReInit(inputs);
+  int *indices = reinterpret_cast<int *>(inputs[4]->addr);
+  for (size_t i = 0; i < inputs[4]->size / sizeof(int); i++) {
+    indices[i] -= rank_id_ * var_first_dim_size_;
+  }
+  return Launch(inputs, workspace, outputs);
+}
+
+const std::vector<size_t> &SparseApplyFtrlPSKernel::input_sizes() const { return GetInputSizeList(); }
+
+const std::vector<size_t> &SparseApplyFtrlPSKernel::output_sizes() const { return GetOutputSizeList(); }
+
+const std::vector<size_t> &SparseApplyFtrlPSKernel::workspace_sizes() const { return GetWorkspaceSizeList(); }
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h
new file mode 100644
index 00000000000..d97f19d3490
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h
@@ -0,0 +1,50 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_FTRL_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_FTRL_PS_KERNEL_H_
+
+#include <vector>
+#include <memory>
+#include "backend/kernel_compiler/cpu/ps/pserver_kernel.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+namespace ps {
+using mindspore::kernel::SparseApplyFtrlCPUKernel;
+class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerKernel {
+ public:
+  SparseApplyFtrlPSKernel(size_t rank_id, size_t pserver_num) : PServerKernel(rank_id, pserver_num) {}
+  ~SparseApplyFtrlPSKernel() override = default;
+
+  void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
+  void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
+
+  bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+               const std::vector<AddressPtr> &outputs) override;
+
+  const std::vector<size_t> &input_sizes() const override;
+  const std::vector<size_t> &output_sizes() const override;
+  const std::vector<size_t> &workspace_sizes() const override;
+
+ protected:
+  void ReInit(const std::vector<AddressPtr> &) override;
+};
+}  // namespace ps
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_FTRL_PS_KERNEL_H_
diff --git a/mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc
index e56f2af8c72..0dddf1d3c41 100644
--- a/mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc
@@ -16,8 +16,8 @@
 #include <map>
 #include <string>
 #include <vector>
-#include "kernel/cpu/reduce_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/reduce_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h
index 3317ec72edb..a9696bad49e 100644
--- a/mindspore/ccsrc/kernel/cpu/reduce_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h
@@ -18,8 +18,8 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.cc
index 19a4e907a0e..f44c109ace7 100644
--- a/mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/reduce_scatter_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "device/cpu/mpi/mpi_adapter.h"
+#include "backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
+#include "runtime/device/cpu/mpi/mpi_adapter.h"
 #include "ir/primitive.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.h
index 5c6907602af..317d7df4433 100644
--- a/mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
 #include <vector>
 #include <string>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.cc
index 7342a19e993..6370fdc78a2 100644
--- a/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/reshape_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/reshape_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.h
index 6ca746f4acc..04f1db33044 100644
--- a/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_RESHAPE_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.cc
index d2530430e93..c6657a845a7 100644
--- a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.cc
@@ -13,9 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/slice_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
-#include "ir/primitive.h"
+#include "backend/kernel_compiler/cpu/slice_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.h
index 913c993d7a8..03b7ecdc173 100644
--- a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_SLICE_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.cc
index 92eaffe8c6a..20904e05048 100644
--- a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/slice_grad_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/slice_grad_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "ir/primitive.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.h
index 1e42c8ac687..ec480d7e80b 100644
--- a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_SLICE_GRAD_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.cc
index ef3db782758..2ff8e77fcd1 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/sparse_apply_adam_cpu_kernel.h"
-#include "kernel/common_utils.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.h
index c2770d0ebd0..5d3d4193f75 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.h
@@ -18,8 +18,8 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
@@ -33,7 +33,7 @@ class SparseApplyAdamCPUKernel : public CPUKernel {
   bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
               const std::vector<AddressPtr> &outputs) override;
 
- private:
+ protected:
   size_t indices_size_{0};
   size_t var_first_dim_size_{0};
   size_t var_outer_dim_size_{1};
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.cc
similarity index 89%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.cc
index 0537e746f3a..2662604e196 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/sparse_apply_ftrl_cpu_kernel.h"
-#include "kernel/common_utils.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
@@ -66,6 +66,8 @@ void SparseApplyFtrlCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node)
   MS_EXCEPTION_IF_NULL(kernel_node);
   workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
   workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
+  workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
+  workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
 }
 
 void SparseApplyFtrlCPUKernel::InitKernel(const CNodePtr &kernel_node) {
@@ -130,9 +132,12 @@ bool SparseApplyFtrlCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp
   auto indices = reinterpret_cast<int *>(inputs[4]->addr);
   auto new_grad = reinterpret_cast<float *>(workspace[0]->addr);
   auto new_indices = reinterpret_cast<int *>(workspace[1]->addr);
+  auto tmp_grad = reinterpret_cast<float *>(workspace[2]->addr);
+  auto tmp_indices = reinterpret_cast<int *>(workspace[3]->addr);
   SparseGradient unique_sparse_grad({new_grad, new_indices, indices_size_});
-  ReduceSparseGradient(SparseGradient({grad, indices, indices_size_}), &unique_sparse_grad, var_first_dim_size_,
-                       var_outer_dim_size_);
+  SparseGradient tmp_sparse_grad({tmp_grad, tmp_indices, indices_size_});
+  TwoLevelReduceSparseGradient(SparseGradient({grad, indices, indices_size_}), &tmp_sparse_grad, &unique_sparse_grad,
+                               var_first_dim_size_, var_outer_dim_size_);
 
   MultiThreadComputeParams input_params;
   input_params.var_ = var;
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.h
index 9e79dc83c78..af8796d8a5c 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_APPLY_FTRL_CPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
@@ -32,7 +32,7 @@ class SparseApplyFtrlCPUKernel : public CPUKernel {
   bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
               const std::vector<AddressPtr> &outputs) override;
 
- private:
+ protected:
   size_t indices_size_{0};
   size_t var_first_dim_size_{0};
   size_t var_outer_dim_size_{1};
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.cc
similarity index 89%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.cc
index 16cb901b048..636d92dcbb1 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.h"
-#include "kernel/common_utils.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
@@ -61,6 +61,8 @@ void SparseApplyLazyAdamCPUKernel::InitInputOutputSize(const CNodePtr &kernel_no
   MS_EXCEPTION_IF_NULL(kernel_node);
   workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
   workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
+  workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
+  workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
 }
 
 void SparseApplyLazyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) {
@@ -121,10 +123,13 @@ bool SparseApplyLazyAdamCPUKernel::Launch(const std::vector<kernel::AddressPtr>
   auto indices = reinterpret_cast<int *>(inputs[10]->addr);
   auto new_grad = reinterpret_cast<float *>(workspace[0]->addr);
   auto new_indices = reinterpret_cast<int *>(workspace[1]->addr);
+  auto tmp_grad = reinterpret_cast<float *>(workspace[2]->addr);
+  auto tmp_indices = reinterpret_cast<int *>(workspace[3]->addr);
 
   SparseGradient unique_sparse_grad({new_grad, new_indices, indices_size_});
-  ReduceSparseGradient(SparseGradient({grad, indices, indices_size_}), &unique_sparse_grad, var_first_dim_size_,
-                       var_outer_dim_size_);
+  SparseGradient tmp_sparse_grad({tmp_grad, tmp_indices, indices_size_});
+  TwoLevelReduceSparseGradient(SparseGradient({grad, indices, indices_size_}), &tmp_sparse_grad, &unique_sparse_grad,
+                               var_first_dim_size_, var_outer_dim_size_);
 
   lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power);
   MultiThreadComputeParams input_params;
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.h
index 795568a64dd..ee95db8f332 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.h
@@ -18,8 +18,8 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc
index 6069fb708ed..efba35ad8c0 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h"
-#include "kernel/common_utils.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h
index ff7da7966c2..56b180ec0be 100644
--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h
@@ -18,8 +18,8 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sub_cpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sub_cpu_kernel.cc
index 543f0e5cdda..1e759390a25 100644
--- a/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sub_cpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 #include <thread>
-#include "kernel/cpu/sub_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/sub_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/sub_cpu_kernel.h
similarity index 92%
rename from mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/sub_cpu_kernel.h
index 54b2c8951a8..d1b55ded90f 100644
--- a/mindspore/ccsrc/kernel/cpu/sub_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sub_cpu_kernel.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_KERNEL_CPU_SUB_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/cpu/transpose_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/cpu/transpose_cpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc
index f2ac9350cba..8ec3698cf6b 100644
--- a/mindspore/ccsrc/kernel/cpu/transpose_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/cpu/transpose_cpu_kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/cpu/transpose_cpu_kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 namespace mindspore {
 namespace kernel {
 const size_t kMaxDim = 100;
diff --git a/mindspore/ccsrc/kernel/cpu/transpose_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.h
similarity index 90%
rename from mindspore/ccsrc/kernel/cpu/transpose_cpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.h
index d882f4fa510..15796f9f3cd 100644
--- a/mindspore/ccsrc/kernel/cpu/transpose_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.h
@@ -18,8 +18,8 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "kernel/cpu/cpu_kernel.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 namespace mindspore {
 namespace kernel {
 class TransposeCPUFwdKernel : public CPUKernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/argmax_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmax_gpu_kernel.cc
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/arrays/argmax_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmax_gpu_kernel.cc
index 71f612d07c3..39f535a2af4 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/argmax_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmax_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/argmax_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/argmax_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/argmax_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmax_gpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/arrays/argmax_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmax_gpu_kernel.h
index 3df70d0960d..61a53c5b403 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/argmax_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmax_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_ARGMAXGPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/argmax_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/argmax_impl.cuh"
 namespace mindspore {
 namespace kernel {
 #define ARGMAX_MAX_DIMENSION 2
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.cc
index 24c8a9a7301..5ead387ccc4 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.h
index 304f0ab1611..d2369023fbb 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/argmaxwithvalue_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_ARGMAXWITHVALUEGPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/argmaxwithvalue_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cuh"
 namespace mindspore {
 namespace kernel {
 template <typename T, typename S>
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.cc
index f3786046243..5d34a1c9c2b 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/array_reduce_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h
index 4a52439305c..b96f63670d6 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/array_reduce_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h
@@ -20,9 +20,9 @@
 #include <map>
 #include <string>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 namespace mindspore {
 namespace kernel {
 const std::map<std::string, cudnnReduceTensorOp_t> kReduceTypeMap = {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/concatv2_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/arrays/concatv2_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.cc
index 3bca6a69d3c..f5979dc62d0 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/concatv2_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/concatv2_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/concatv2_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/arrays/concatv2_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h
index a91c50ce699..15ccedcaeca 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/concatv2_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_CONCATV2_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/concatv2_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/gather_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gather_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/arrays/gather_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gather_gpu_kernel.cc
index dc595e47936..8d3c06e805b 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/gather_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gather_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/gather_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/gather_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/gather_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gather_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/arrays/gather_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gather_gpu_kernel.h
index 72a05b09155..2211361ceec 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/gather_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/gather_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_GATHER_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/gather.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/gather.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/one_hot_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/arrays/one_hot_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.cc
index 7c160f8f584..e764a08dc88 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/one_hot_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/one_hot_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/one_hot_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/arrays/one_hot_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h
index c8b64e72430..6c46a63e69b 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/one_hot_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_ONEHOT_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/one_hot_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/select_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/select_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/arrays/select_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/select_gpu_kernel.cc
index 41c9c2243f1..3c1323de07a 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/select_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/select_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/select_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/select_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/select_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/select_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/arrays/select_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/select_gpu_kernel.h
index f1b6c5853a1..73e60c44bd6 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/select_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/select_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_SELECT_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/select_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/select_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.cc
index 53161c29c2a..4c9ff2b7f4a 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/slice_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h
index 7f71e548add..f8ecb9ccf03 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_SLICE_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/slice_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/slice_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/arrays/slice_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.cc
index b91aafb7341..2eeb3acf734 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/slice_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/slice_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/slice_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/arrays/slice_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h
index bf24272d93d..006cbf0266b 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/slice_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_SLICE_GRAD_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/slice_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/transpose_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.cc
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/arrays/transpose_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.cc
index 338e7a4093b..77e7de6fef5 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/transpose_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/transpose_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.h"
 namespace mindspore {
 namespace kernel {
 MS_REG_GPU_KERNEL_ONE(Transpose, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/transpose_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/arrays/transpose_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.h
index 61be9b68fea..0f9c710e3e0 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/transpose_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/transpose_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_TRANSPOSE_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/transpose_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cuh"
 namespace mindspore {
 namespace kernel {
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc
index 9962d559882..4be887ec79b 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/arrays/unsorted_segment_sum_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/arrays/unsorted_segment_sum_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/arrays/unsorted_segment_sum_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.h
index a20375ee299..1f7884c6502 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/unsorted_segment_sum_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unsorted_segment_sum_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_UNSORT_SEGMENT_SUM_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/unsorted_segment_sum.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/control/recv_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/recv_gpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/gpu/control/recv_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/control/recv_gpu_kernel.cc
index 5468aa65003..a89d4e9baf6 100644
--- a/mindspore/ccsrc/kernel/gpu/control/recv_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/recv_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/control/recv_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/control/recv_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/control/recv_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/recv_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/control/recv_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/control/recv_gpu_kernel.h
index 12b4eed1326..7de32ade4fb 100644
--- a/mindspore/ccsrc/kernel/gpu/control/recv_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/recv_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_CONTROL_RECV_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/control/send_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/send_gpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/gpu/control/send_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/control/send_gpu_kernel.cc
index c417c30bb33..946038bb183 100644
--- a/mindspore/ccsrc/kernel/gpu/control/send_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/send_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/control/send_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/control/send_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/control/send_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/send_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/control/send_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/control/send_gpu_kernel.h
index a26e41aa1ef..beea19a4352 100644
--- a/mindspore/ccsrc/kernel/gpu/control/send_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/control/send_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_CONTROL_SEND_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_impl.cu
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_impl.cu
index 3ec63ee03a7..615b94723d7 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_impl.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/adam_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/adam_impl.cuh"
 
 template <typename T>
 __device__ __forceinline__ T SqrtFunc(T input) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_impl.cuh
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_impl.cuh
index f48a113c261..7fc4a3e9498 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ADAM_IMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ADAM_IMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 void ApplyAdam(const size_t size, const T *gradient, const T *beta1_power, const T *beta2_power, const T *learning_rate,
                const T *beta1, const T *beta2, const T *epsilon, T *variable, T *m, T *v, cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_weight_decay_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_weight_decay_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/adam_weight_decay_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_weight_decay_impl.cu
index dfadaa09d6c..3bad9a61e17 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_weight_decay_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_weight_decay_impl.cu
@@ -15,7 +15,7 @@
  */
 
 #include "adam_weight_decay_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 __global__ void AdamWeightDecayKernel(const int element_num_, const bool need_decay, const float *beta1,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/adam_weight_decay_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_weight_decay_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/adam_weight_decay_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/adam_weight_decay_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmax_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmax_impl.cu
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/argmax_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmax_impl.cu
index e8fab27dda3..a4f1f6680b3 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmax_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmax_impl.cu
@@ -15,7 +15,7 @@
  */
 
 #include "argmax_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 #include "include/cuda_fp16.h"
 template <typename T>
 __global__ void Argmax1D(const T* input, const int channel_size, int* output) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmax_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmax_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/argmax_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmax_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cu
index 3313fc68532..46a8a75af92 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cu
@@ -15,7 +15,7 @@
  */
 
 #include "argmaxwithvalue_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 #include "include/cuda_fp16.h"
 template <typename T, typename S>
 __global__ void ArgmaxWithValue(const T* input, const int bound, int outerSize, int innerSize, S* index,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/argmaxwithvalue_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/assign_add_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/assign_add_impl.cu
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/assign_add_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/assign_add_impl.cu
index d44ad99202e..604391ccf31 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/assign_add_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/assign_add_impl.cu
@@ -15,7 +15,7 @@
  */
 
 #include "assign_add_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 #include "include/cuda_fp16.h"
 template <typename T>
 __global__ void AssignAdd(const size_t size, T* ref, const T* value, T* output) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/assign_add_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/assign_add_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/assign_add_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/assign_add_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold2_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold2_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold2_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold2_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold2_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold2_impl.cuh
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold2_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold2_impl.cuh
index c3ce08dfd07..3a895405b1d 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold2_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold2_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BATCHNORMFOLD2_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BATCHNORMFOLD2_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 void BatchNormFold2Forward(const T *x, const T *beta, const T *gamma, const T *batch_std, const T *batch_mean,
                            const T *running_std, const T *running_mean, const int *global_step, T *y, int freeze_bn,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold_impl.cu
index ddc2803f564..dae9a7d6291 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold_impl.cu
@@ -18,7 +18,7 @@
 #include <thrust/fill.h>
 #include <thrust/system/cuda/execution_policy.h>
 #include "batchnorm_fold_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 __global__ void UpdateRunningStd(int channel_size, const double epsilon, T* running_std) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/batchnorm_fold_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cu
index 5aa087e7f51..262d4c438d7 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cu
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/broadcast_grad_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cuh"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 struct MinimumGradFunc {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cuh
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cuh
index d154eddd4cc..77420435922 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BROADCAST_GRAD_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BROADCAST_GRAD_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 enum BroadcastGradOpType {
   BROADCAST_GRAD_TYPE_MAXIMUM = 0,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu
index afa94fc56cd..a72daa42346 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/broadcast_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T, typename S>
 struct GreaterFunc {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh
index 5f6992511d1..dfc4c75c932 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BROADCAST_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BROADCAST_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 enum BroadcastOpType {
   BROADCAST_TYPE_GREATER = 0,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/concatv2_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/concatv2_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu
index 5cccf183eac..147782591ae 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/concatv2_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu
@@ -17,7 +17,7 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <cuda_runtime.h>
-#include "kernel/gpu/cuda_impl/concatv2_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh"
 template <typename T>
 __global__ void Concat(const size_t size, const int w1, const int w2, const T* input_1, const T* input_2, T* output) {
   for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim.x) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/concatv2_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/concatv2_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh
index b6932aa4a15..7bd32c140fb 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/concatv2_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_CONCATV2IMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_CONCATV2IMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 void ConcatKernel(const size_t size, const int w1, const int w2, const T* input_1, const T* input_2, T* output,
                   cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/correction_mul_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/correction_mul_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cu
index ac2f99ed9a7..87aaf1351cd 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/correction_mul_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cu
@@ -16,7 +16,7 @@
 
 #include <thrust/reduce.h>
 #include "correction_mul_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 __global__ void CorrectionMul(const T* weight, const T* gamma, const T* running_std, const int batchsize, const int chw,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/correction_mul_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/correction_mul_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/cross_entropy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/cross_entropy_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/cross_entropy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cuh
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/cross_entropy_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cuh
index 54ae0728929..cb4ccc2c44a 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/cross_entropy_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_CROSSENTROPY_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_CROSSENTROPY_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T, typename S>
 void CrossEntropyWithSparse(const T *logits, const S *labels, const size_t batch_size, const size_t class_num, T *loss,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cuh
index f89d42ce49b..3ba27eeeea3 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/dropout_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_DROPOUT_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_DROPOUT_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 void DropoutForward(const T *input, T *mask, T *output, float *mask_f, size_t num_count, float keep_prob,
                     cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/equalcount_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/equalcount_impl.cu
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/equalcount_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/equalcount_impl.cu
index 38dd79c441d..e6f424c6617 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/equalcount_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/equalcount_impl.cu
@@ -15,7 +15,7 @@
  */
 
 #include "equalcount_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 __global__ void EqualCount(const int size, const T* input1, const T* input2, T* output) {
   T equal_count = 0;
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/equalcount_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/equalcount_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/equalcount_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/equalcount_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perchannel_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perchannel_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perchannel_impl.cuh
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perchannel_impl.cuh
index ad2e387b082..e17615db67e 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perchannel_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_FAKE_QUANT_PERCHANNEL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_FAKE_QUANT_PERCHANNEL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 void CalNudgePerChannel(float *input_min, float *input_max, const float quant_min, const float quant_max,
                         float *nudge_min, float *nudge_max, float *scale, const int channel_num, const bool symmetric,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perlayer_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perlayer_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perlayer_impl.cuh
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perlayer_impl.cuh
index dda95ed781f..5f6675b2d72 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/fake_quant_perlayer_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_FAKE_QUANT_PERLAYER_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_FAKE_QUANT_PERLAYER_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 void CalNudgePerLayer(float *input_min, float *input_max, const float quant_min, const float quant_max,
                       float *nudge_min, float *nudge_max, float *scale, const bool symmetric, cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/float_status_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/float_status_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cu
index c2fd5ecd70b..bc400eb7049 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/float_status_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cu
@@ -15,7 +15,7 @@
  */
 
 #include "include/cuda_runtime.h"
-#include "kernel/gpu/cuda_impl/float_status_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cuh"
 
 template <typename T>
 __global__ void IsNan(const size_t size, const T* input, bool* out) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/float_status_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/float_status_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cuh
index da488ff937d..fbe063e72a1 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/float_status_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cuh
@@ -16,7 +16,7 @@
 
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_FLOATSTATUS_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_FLOATSTATUS_H_
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 void CalFloatStatus(const size_t size, const T *input, T *output, cudaStream_t stream);
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cu
index ea6ffdbbdc6..be4415d5099 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/ftrl_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cuh"
 
 template <typename T>
 __device__ __forceinline__ T PowFunc(T x, T y) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cuh
index ba4a8fa816f..b5f0f82afe6 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/ftrl_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_FTRL_IMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_FTRL_IMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 void ApplyFtrl(const size_t size, const T *gradient, const T *learning_rate, const T *l1_regularization,
                const T *l2_regularization, const T *learning_rate_power, T *variable, T *accumulation, T *linear,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/gather.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gather.cu
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/gather.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gather.cu
index 6bde359d9b0..03b58b81a09 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/gather.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gather.cu
@@ -15,8 +15,8 @@
  */
 
 #include <iostream>
-#include "kernel/gpu/cuda_impl/gather.cuh"
-#include "device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/gather.cuh"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T, typename S>
 __global__ void GatherKernel(T *input, S *indices, T *output, size_t output_dim0, size_t output_dim1,
                              size_t output_dim2, size_t input_dim1) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/gather.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gather.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/gather.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gather.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cu
index e460caec9e4..a4dc6648ccd 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cu
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/gelu_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cuh"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 __global__ void GeluKernel(size_t size, T *input_addr, T *output_addr) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cuh
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cuh
index 7a8e1fae8a4..1e69f26d57a 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/gelu_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_GELU_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_GELU_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template<typename T>
 void Gelu(size_t input_size, T* input_addr, T* output_addr, cudaStream_t cuda_stream);
 
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cu
index e887b98ecaa..fcb74189520 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cu
@@ -17,8 +17,8 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <cuda_runtime.h>
-#include "kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh"
-#include "kernel/gpu/cuda_impl/layer_norm_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cuh"
 
 constexpr int NUM_PER_THREAD_REDUCE = 4;
 constexpr int WARP_SIZE = 32;
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cuh
index 9f7d57cdb98..13d7a586149 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_GRAD_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_GRAD_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 void LayerNormGrad(const int& row_dim, const int& col_dim, const int& param_dim, const T& epsilon, const T* dy,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cu
index cfb60f0ba6e..138300b3034 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cu
@@ -17,7 +17,7 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <cuda_runtime.h>
-#include "kernel/gpu/cuda_impl/layer_norm_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cuh"
 
 constexpr int NUM_PER_THREAD_REDUCE = 4;
 constexpr int WARP_SIZE = 32;
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cuh
index c06a6983843..9548b30d441 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 struct DynamicSharedMem;
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cu
index 27b2cb0232c..3915dba172f 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cu
@@ -20,7 +20,7 @@
 #include <thrust/reduce.h>
 #include <thrust/pair.h>
 #include "minmax_update_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 __global__ void UpdateInputMinMaxPerLayerWithEMA(const float *input_min, const float *input_max, float *output_min,
                                                  float *output_max, const float min, const float max,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cuh
index 5e9becab380..b4b4d582eea 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/minmax_update_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_MIN_MAX_UPDATE_IMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_MIN_MAX_UPDATE_IMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 void CalMinMaxPerChannel(float *input, float *input_min, float *input_max, float *output_min, float *output_max,
                          const int total_num, const int channel_num, const float ema_decay, const bool ema,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/momentum_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/momentum_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/momentum_impl.cuh
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/momentum_impl.cuh
index 5405f5ef1d7..62708663ad3 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/momentum_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/momentum_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_MOMENTUMIMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_MOMENTUMIMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T, typename S>
 void MomentumUpdateVariable(const size_t size, T *variable, T *accumulation, const S *learning_rate, const T *gradient,
                             const S *momentum, cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/one_hot_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/one_hot_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cu
index cf5dc7ecd02..6dc4d676f22 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/one_hot_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cu
@@ -15,7 +15,7 @@
  */
 
 #include "one_hot_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T, typename S>
 __global__ void OneHotKernel(size_t size, const S *indices, size_t depth, const T *on_value, const T *off_value,
                              size_t left_dim_size, size_t right_dim_size, T *output) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/one_hot_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/one_hot_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/one_hot_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/pad_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/pad_impl.cu
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/pad_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/pad_impl.cu
index ddc615d94ba..3bb4d04a011 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/pad_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/pad_impl.cu
@@ -16,7 +16,7 @@
 
 #include <stdio.h>
 #include <stdint.h>
-#include "kernel/gpu/cuda_impl/pad_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh"
 
 template <typename T>
 __global__ void Pad(const size_t size, const T* input, const int num, const int channels, const int old_height,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/pad_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/pad_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh
index dc3036b8b65..b10804fdab8 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/pad_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_PADIMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_PADIMPL_H_
 #include <cuda_runtime.h>
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 void CalPad(const size_t size, const T* input, const int num, const int channels, const int old_height,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/random_op_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/random_op_impl.cu
new file mode 100644
index 00000000000..6f993945625
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/random_op_impl.cu
@@ -0,0 +1,42 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */ 
+
+#include "random_op_impl.cuh"
+template <typename T>
+__global__ void NormalKernel(int seed, curandState *globalState, T *output, size_t count) {
+  for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) {
+    curand_init(seed, i, 0, &globalState[i]);
+    output[i] = curand_normal(&globalState[i]);
+  }
+  return;
+}
+
+template <typename T>
+void StandardNormal(int seed, int seed2, curandState *globalState, T *output, size_t count, cudaStream_t cuda_stream) {
+  int RNG_seed = 0;
+  if (seed2 != 0) {
+    RNG_seed = seed2;
+  } else if (seed != 0) {
+    RNG_seed = seed;
+  } else {
+    RNG_seed = time(NULL);
+  }
+  NormalKernel<<<GET_BLOCKS(count), GET_THREADS, 0, cuda_stream>>>(RNG_seed, globalState, output, count);
+  return;
+}
+
+template void StandardNormal<float>(int seed, int seed2, curandState *globalState,
+                                    float *output, size_t count, cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/random_op_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/random_op_impl.cuh
new file mode 100644
index 00000000000..b099ead9bf3
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/random_op_impl.cuh
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_RANDOMOPIMPL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_RANDOMOPIMPL_H_
+
+#include <curand_kernel.h>
+#include "runtime/device/gpu/cuda_common.h"
+
+template <typename T>
+void StandardNormal(int seed, int seed2, curandState *globalState,
+                    T *output, size_t count, cudaStream_t cuda_stream);
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_RANDOMOPIMPL_H_
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cu
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cu
index 913aaa3b8d3..80806b552f3 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cu
@@ -15,8 +15,8 @@
  */
 
 #include <iostream>
-#include "kernel/gpu/cuda_impl/rmsprop_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cuh"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 __global__ void RmsPropKernel(const T* learning_rate, const T decay, const T momentum, const T epsilon, T* variable,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cuh
index b5802dbb67f..16ad6113812 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/rmsprop_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cuh
@@ -16,7 +16,7 @@
 
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_RMSPROP_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_RMSPROP_H_
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 void RmsProp(const T* learning_rate, const T decay, const T momentum, const T epsilon, T* variable, T* mean_square,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/select_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/select_impl.cu
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/select_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/select_impl.cu
index f07a820e756..f7086f80930 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/select_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/select_impl.cu
@@ -17,7 +17,7 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <include/cuda_runtime.h>
-#include "kernel/gpu/cuda_impl/select_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/select_impl.cuh"
 
 template <typename T>
 __global__ void Select(const size_t size, const bool* cond, const T* input_x, const T* input_y, T* output) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/select_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/select_impl.cuh
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/select_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/select_impl.cuh
index da2d7d9a7fe..e201ab352ce 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/select_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/select_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SELECT_IMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SELECT_IMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 void CalSelect(const size_t size, const bool* cond, const T* input_x, const T* input_y, T* output,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cu
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cu
index a0082b84c89..f0c64bfb015 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh"
 
 template <typename T, typename S>
 __global__ void SigmoidCrossEntropyWithLogitsGradKernel(const size_t size, const T *logits, const S *labels,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh
index 2cd4922d258..6b444d6c029 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_IMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_IMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T, typename S>
 void SigmoidCrossEntropyWithLogitsGrad(const size_t size, const T *logits, const S *labels, T *outputs,
                                        cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cu
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cu
index 3766f367db1..7425ac38095 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh"
 
 template <typename T, typename S>
 __global__ void SigmoidCrossEntropyWithLogitsKernel(const size_t size, const T *logits, const S *labels, T *outputs) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh
index 575605bde00..7e9130857fa 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_IMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_IMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T, typename S>
 void SigmoidCrossEntropyWithLogits(const size_t size, const T *logits, const S *labels, T *outputs,
                                    cudaStream_t cuda_stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/slice_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu
similarity index 99%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/slice_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu
index e49a22bb468..dd4effc1744 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/slice_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cu
@@ -18,7 +18,7 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <algorithm>
-#include "kernel/gpu/cuda_impl/slice_impl.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
 
 template <typename T>
 __global__ void Slice4D(const int s1, const int s2, const int s3, const int s4,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/slice_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/slice_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh
index 9513d6ed24f..e04f277c3d4 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/slice_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh
@@ -19,7 +19,7 @@
 
 #include <cuda_runtime.h>
 #include <vector>
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 
 template <typename T>
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cu
new file mode 100644
index 00000000000..9050044b7f1
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cu
@@ -0,0 +1,64 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "smooth_l1_loss_impl.cuh"
+#include "runtime/device/gpu/cuda_common.h"
+
+template <typename T>
+__global__ void SmoothL1LossKernel(const int input_size, const float sigma, const T *prediction, const T *target,
+                                   T *loss) {
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) {
+    T value = (prediction[i] - target[i]) > 0 ? (prediction[i] - target[i]) : (target[i] - prediction[i]);
+    if (value < sigma) {
+      loss[i] = static_cast<T>(0.5) * value * value;
+    } else {
+      loss[i] = value - static_cast<T>(0.5);
+    }
+  }
+}
+
+template <typename T>
+void SmoothL1Loss(const int &input_size, const float &sigma, const T *prediction, const T *target, T *loss,
+                  cudaStream_t stream) {
+  SmoothL1LossKernel<<<GET_BLOCKS(input_size), GET_THREADS, 0, stream>>>(input_size, sigma, prediction, target, loss);
+}
+
+template <typename T>
+__global__ void SmoothL1LossGradKernel(const int input_size, const float sigma, const T *prediction, const T *target,
+                                       const T *dloss, T *dx) {
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < input_size; i += blockDim.x * gridDim.x) {
+    T value = prediction[i] - target[i];
+    if (value > static_cast<T>(sigma)) {
+      dx[i] = dloss[i];
+    } else if (value < static_cast<T>(-sigma)) {
+      dx[i] = -dloss[i];
+    } else {
+      dx[i] = value * dloss[i];
+    }
+  }
+}
+
+template <typename T>
+void SmoothL1LossGrad(const int &input_size, const float &sigma, const T *prediction, const T *target, const T *dloss,
+                      T *dx, cudaStream_t stream) {
+  SmoothL1LossGradKernel<<<GET_BLOCKS(input_size), GET_THREADS, 0, stream>>>(input_size, sigma, prediction, target,
+                                                                             dloss, dx);
+}
+
+template void SmoothL1Loss(const int &input_size, const float &sigma, const float *prediction, const float *target,
+                           float *loss, cudaStream_t stream);
+template void SmoothL1LossGrad(const int &input_size, const float &sigma, const float *prediction, const float *target,
+                               const float *dloss, float *dx, cudaStream_t stream);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cuh
new file mode 100644
index 00000000000..7938e18a3b4
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cuh
@@ -0,0 +1,25 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SMOOTH_L1_LOSS_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SMOOTH_L1_LOSS_H_
+template <typename T>
+void SmoothL1Loss(const int &input_size, const float &sigma, const T *prediction, const T *target, T *loss,
+                  cudaStream_t stream);
+template <typename T>
+void SmoothL1LossGrad(const int &input_size, const float &sigma, const T *prediction, const T *target, const T *dloss,
+                      T *dx, cudaStream_t stream);
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SMOOTH_L1_LOSS_H_
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cuh
index d16131470c5..fa322603812 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sparse_cross_entropy_cuda_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPARSECROSSENTROPYCUDAIMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPARSECROSSENTROPYCUDAIMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template <typename T>
 void CalCrossEntropy(const float *logits, T *labels, const int batch_size, const int class_num, float *loss,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/transpose_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cu
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/transpose_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cu
index a0fea901360..ffcb2c80524 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/transpose_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cu
@@ -16,7 +16,7 @@
 
 #include <cuda_runtime.h>
 #include "transpose_impl.cuh"
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 __global__ void Transpose(const int size, const T* input, const int* input_shape, const int* input_axis,
                           const int shape_size, T* output) {
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/transpose_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cuh
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/transpose_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cuh
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/unary_op_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unary_op_impl.cu
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/unary_op_impl.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unary_op_impl.cu
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/unary_op_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unary_op_impl.cuh
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/unary_op_impl.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unary_op_impl.cuh
index 623b1a8c03e..cf8b30866e7 100755
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/unary_op_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unary_op_impl.cuh
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_UNARYOPIMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_UNARYOPIMPL_H_
 
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 template <typename T>
 void Exponential(T *input, T *output, size_t count, cudaStream_t cuda_stream);
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/unsorted_segment_sum.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cu
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/unsorted_segment_sum.cu
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cu
index a7affd47056..3d299c23520 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/unsorted_segment_sum.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/cuda_impl/unsorted_segment_sum.cuh"
+#include "backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cuh"
 
 template<typename T, typename S>
 __global__ void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0, size_t output_dim1,
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/unsorted_segment_sum.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cuh
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/cuda_impl/unsorted_segment_sum.cuh
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cuh
index ef950329969..315677fde41 100644
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/unsorted_segment_sum.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unsorted_segment_sum.cuh
@@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_UNSORT_SEGMENT_SUM_H_
 
 #include <cuda_runtime.h>
-#include "device/gpu/cuda_common.h"
+#include "runtime/device/gpu/cuda_common.h"
 
 template<typename T, typename S>
 void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0, size_t output_dim1,
diff --git a/mindspore/ccsrc/kernel/gpu/data/dataset_init_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.cc
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/data/dataset_init_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.cc
index 777310cebca..3c88b88c747 100644
--- a/mindspore/ccsrc/kernel/gpu/data/dataset_init_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/data/dataset_init_kernel.h"
-#include "kernel/gpu/data/dataset_utils.h"
-#include "device/gpu/gpu_buffer_mgr.h"
-#include "device/gpu/gpu_memory_allocator.h"
+#include "backend/kernel_compiler/gpu/data/dataset_init_kernel.h"
+#include "backend/kernel_compiler/gpu/data/dataset_utils.h"
+#include "runtime/device/gpu/gpu_buffer_mgr.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
 #include "utils/convert_utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/gpu/data/dataset_init_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/data/dataset_init_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.h
index 318049f4ad5..f8cc9b19eaa 100644
--- a/mindspore/ccsrc/kernel/gpu/data/dataset_init_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.h
@@ -19,8 +19,8 @@
 
 #include <string>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.cc
index 13ca191b0ba..67a487ce28e 100644
--- a/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/data/dataset_iterator_kernel.h"
+#include "backend/kernel_compiler/gpu/data/dataset_iterator_kernel.h"
 #include <cuda_runtime_api.h>
 #include <string>
 #include <vector>
-#include "device/gpu/gpu_buffer_mgr.h"
-#include "device/gpu/gpu_common.h"
-#include "kernel/gpu/data/dataset_utils.h"
+#include "runtime/device/gpu/gpu_buffer_mgr.h"
+#include "runtime/device/gpu/gpu_common.h"
+#include "backend/kernel_compiler/gpu/data/dataset_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.h
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.h
index cdd7a47e7ba..746aed3294b 100644
--- a/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.h
@@ -19,8 +19,8 @@
 
 #include <string>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/data/dataset_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/data/dataset_utils.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.cc
index 846a63f84fd..cb014a3d2b1 100644
--- a/mindspore/ccsrc/kernel/gpu/data/dataset_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/data/dataset_utils.h"
+#include "backend/kernel_compiler/gpu/data/dataset_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/data/dataset_utils.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.h
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/data/dataset_utils.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.h
diff --git a/mindspore/ccsrc/kernel/gpu/gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h
index c935798f065..4c179f2173c 100644
--- a/mindspore/ccsrc/kernel/gpu/gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h
@@ -21,11 +21,11 @@
 #include <cudnn.h>
 #include <string>
 #include <vector>
-#include "kernel/kernel.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "device/gpu/gpu_device_manager.h"
-#include "device/gpu/gpu_common.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "runtime/device/gpu/gpu_device_manager.h"
+#include "runtime/device/gpu/gpu_common.h"
+#include "backend/session/anf_runtime_algorithm.h"
 using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc
index b00b5c263dd..4a0191abd74 100644
--- a/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 #include <iostream>
 #include <string>
 
 #include "common/utils.h"
-#include "device/kernel_info.h"
-#include "device/gpu/cuda_common.h"
-#include "kernel/common_utils.h"
+#include "runtime/device/kernel_info.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
@@ -137,7 +137,7 @@ std::pair<bool, size_t> GpuKernelFactory::GpuKernelAttrCheck(const std::string &
 }
 
 GpuKernel *GpuKernelFactory::Create(const std::string &kernel_name, const CNodePtr &apply_kernel) {
-  auto kernel_info = apply_kernel->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(apply_kernel->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   const KernelBuildInfo *kernel_build_Info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(kernel_build_Info);
diff --git a/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.h
index dc5f61a315a..8834fa0f1a6 100644
--- a/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.h
@@ -21,9 +21,9 @@
 #include <string>
 #include <vector>
 #include <utility>
-#include "kernel/gpu/gpu_kernel.h"
-#include "device/gpu/kernel_info_setter.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "runtime/device/gpu/kernel_info_setter.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/kernel_constants.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/kernel_constants.h
similarity index 100%
rename from mindspore/ccsrc/kernel/gpu/kernel_constants.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/kernel_constants.h
diff --git a/mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/addn_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/addn_gpu_kernel.cc
index 4683f015ae7..86c7d8c1080 100644
--- a/mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/addn_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/addn_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/addn_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/addn_gpu_kernel.h
similarity index 78%
rename from mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/addn_gpu_kernel.h
index 1498da777f7..b69bd20216f 100644
--- a/mindspore/ccsrc/kernel/gpu/math/addn_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/addn_gpu_kernel.h
@@ -19,9 +19,11 @@
 
 #include <memory>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
@@ -43,18 +45,26 @@ class AddNGpuFwdKernel : public GpuKernel {
   const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
 
   bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
-              const std::vector<AddressPtr> &outputs, void *) override {
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
     if (is_null_input_) {
       return true;
     }
     T *output_addr = GetDeviceAddress<T>(outputs, 0);
+    if (cudnn_data_type_ == CUDNN_DATA_INT32) {
+      FillDeviceArray(outputs[0]->size / sizeof(T), output_addr, 0.0f, reinterpret_cast<cudaStream_t>(stream_ptr));
+    }
     const float alpha = 1;
     const float beta = 0;
     for (size_t i = 0; i < IntToSize(num_input_); i++) {
       T *input_addr = GetDeviceAddress<T>(inputs, i);
-      CHECK_CUDNN_RET_WITH_EXCEPT(cudnnAddTensor(cudnn_handle_, &alpha, input_descriptor_, input_addr,
-                                                 &(i > 0 ? alpha : beta), input_descriptor_, output_addr),
-                                  "cudnnAddTensor failed");
+      if (cudnn_data_type_ == CUDNN_DATA_INT32) {
+        NoBroadcast(outputs[0]->size / sizeof(T), BROADCAST_TYPE_ADD, input_addr, output_addr, output_addr,
+                    reinterpret_cast<cudaStream_t>(stream_ptr));
+      } else {
+        CHECK_CUDNN_RET_WITH_EXCEPT(cudnnAddTensor(cudnn_handle_, &alpha, input_descriptor_, input_addr,
+                                                   &(i > 0 ? alpha : beta), input_descriptor_, output_addr),
+                                    "cudnnAddTensor failed");
+      }
     }
     return true;
   }
@@ -100,9 +110,8 @@ class AddNGpuFwdKernel : public GpuKernel {
   }
   void InitSizeLists() override {
     if (!is_null_input_) {
-      CHECK_CUDNN_RET_WITH_EXCEPT(
-        cudnnGetTensorSizeInBytes(input_descriptor_, reinterpret_cast<size_t *>(&input_size_)),
-        "cudnnGetTensorSizeInBytes failed");
+      CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(input_descriptor_, &input_size_),
+                                  "cudnnGetTensorSizeInBytes failed");
     }
     for (int i = 0; i < num_input_; i++) {
       input_size_list_.push_back(input_size_);
diff --git a/mindspore/ccsrc/kernel/gpu/math/assign_add_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/math/assign_add_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.cc
index 2ae1728ca34..bffcca158b5 100644
--- a/mindspore/ccsrc/kernel/gpu/math/assign_add_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/assign_add_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/assign_add_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/math/assign_add_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.h
index db69fd7be64..04a74b34121 100644
--- a/mindspore/ccsrc/kernel/gpu/math/assign_add_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/assign_add_gpu_kernel.h
@@ -19,9 +19,9 @@
 
 #include <cuda_runtime_api.h>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/assign_add_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/assign_add_impl.cuh"
 namespace mindspore {
 namespace kernel {
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/bias_add_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/bias_add_gpu_kernel.cc
index 5684f0c424a..a07fb6ddf64 100644
--- a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/bias_add_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/bias_add_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/bias_add_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/bias_add_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/bias_add_gpu_kernel.h
index 5a664db2e18..fd344be28ae 100644
--- a/mindspore/ccsrc/kernel/gpu/math/bias_add_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/bias_add_gpu_kernel.h
@@ -21,9 +21,9 @@
 #include <algorithm>
 #include <memory>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc
index 96d51b704c3..41e7147328b 100644
--- a/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/broadcast_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h
index be7d3a19d44..aaf827723a8 100644
--- a/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h
@@ -21,10 +21,10 @@
 #include <vector>
 #include <string>
 #include <map>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/broadcast_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 namespace mindspore {
 namespace kernel {
 template <typename T, typename S>
diff --git a/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.cc
index 85598cf9406..49be2fd9a6c 100644
--- a/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/broadcast_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.h
index f1eb5fecf9e..6258c5c4e2f 100644
--- a/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_grad_gpu_kernel.h
@@ -21,10 +21,10 @@
 #include <vector>
 #include <string>
 #include <map>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/broadcast_grad_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/broadcast_grad_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 namespace mindspore {
 namespace kernel {
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/math/equalcount_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/math/equalcount_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.cc
index f3c3b6164d1..3103f30f524 100644
--- a/mindspore/ccsrc/kernel/gpu/math/equalcount_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/equalcount_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/equalcount_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/math/equalcount_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.h
index 7d3f74970fe..eae7a893b7d 100644
--- a/mindspore/ccsrc/kernel/gpu/math/equalcount_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/equalcount_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_EQUALCOUNT_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/equalcount_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/equalcount_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/float_status_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/float_status_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/math/float_status_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/float_status_gpu_kernel.cc
index 374644eaf52..313669a6473 100644
--- a/mindspore/ccsrc/kernel/gpu/math/float_status_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/float_status_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/float_status_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/float_status_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/float_status_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/float_status_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/math/float_status_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/float_status_gpu_kernel.h
index 1aa9b18684f..be74f2e9dc3 100644
--- a/mindspore/ccsrc/kernel/gpu/math/float_status_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/float_status_gpu_kernel.h
@@ -21,9 +21,9 @@
 #include <vector>
 #include <map>
 #include <string>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/float_status_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/float_status_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.cc
index 808d5998533..471c3945987 100644
--- a/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/matmul_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/matmul_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.h
index 3ee3493ed65..7888d442c92 100644
--- a/mindspore/ccsrc/kernel/gpu/math/matmul_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.h
@@ -20,9 +20,9 @@
 #include <cublas_v2.h>
 #include <cuda_runtime_api.h>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 #include "utils/convert_utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/random_op_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/random_op_gpu_kernel.cc
new file mode 100644
index 00000000000..c72c271c522
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/random_op_gpu_kernel.cc
@@ -0,0 +1,24 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/math/random_op_gpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_ONE(StandardNormal, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32),
+                      RandomOpGpuKernel, float)
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/random_op_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/random_op_gpu_kernel.h
new file mode 100644
index 00000000000..785ac02ee5a
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/random_op_gpu_kernel.h
@@ -0,0 +1,121 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_RANDOMOP_GPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_RANDOMOP_GPU_KERNEL_H_
+
+#include <curand_kernel.h>
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/random_op_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+enum RandomOptype { RANDOM_OP_NORMAL = 0, RANDOM_OP_INVALID_TYPE = 255 };
+
+const std::map<std::string, RandomOptype> kRandomOpTypeMap = {{"StandardNormal", RANDOM_OP_NORMAL}};
+template <typename T>
+class RandomOpGpuKernel : public GpuKernel {
+ public:
+  RandomOpGpuKernel()
+      : random_op_type_(RANDOM_OP_INVALID_TYPE),
+        input_size_0_(0),
+        output_size_(sizeof(T)),
+        workspace_size_(sizeof(curandState)) {}
+  ~RandomOpGpuKernel() override = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    void *workspace_addr = GetDeviceAddress<void *>(workspace, 0);
+    curandState *devStates = reinterpret_cast<curandState *>(workspace_addr);
+    T *output_addr = GetDeviceAddress<T>(outputs, 0);
+
+    switch (random_op_type_) {
+      case RANDOM_OP_NORMAL: {
+        StandardNormal(seed_, seed2_, devStates, output_addr, outputs[0]->size / sizeof(T),
+                       reinterpret_cast<cudaStream_t>(stream_ptr));
+        break;
+      }
+      default: {
+        MS_LOG(EXCEPTION) << "Random operation " << random_op_type_ << " is not supported.";
+      }
+    }
+    return true;
+  }
+  bool Init(const CNodePtr &kernel_node) override {
+    std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
+    auto iter = kRandomOpTypeMap.find(kernel_name);
+    if (iter == kRandomOpTypeMap.end()) {
+      MS_LOG(EXCEPTION) << "Random operation " << kernel_name << " is not supported.";
+    } else {
+      random_op_type_ = iter->second;
+    }
+    size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
+    if (input_num != 1) {
+      MS_LOG(ERROR) << "Input number is " << input_num << ", but random op needs 1 input.";
+      return false;
+    }
+    size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
+    if (output_num != 1) {
+      MS_LOG(ERROR) << "Output number is " << output_num << ", but random op needs 1 output.";
+      return false;
+    }
+    auto input_shape_0 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    for (size_t i = 0; i < input_shape_0.size(); i++) {
+      input_size_0_ += input_shape_0[i];
+    }
+    input_size_0_ *= sizeof(int);
+    auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
+    for (size_t i = 0; i < output_shape.size(); i++) {
+      output_size_ *= output_shape[i];
+      workspace_size_ *= output_shape[i];
+    }
+    seed_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed"));
+    seed2_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed2"));
+    InitSizeLists();
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(input_size_0_);
+    output_size_list_.push_back(output_size_);
+    workspace_size_list_.push_back(workspace_size_);
+  }
+
+ private:
+  RandomOptype random_op_type_;
+  size_t input_size_0_;
+  size_t output_size_;
+  size_t workspace_size_;
+  int seed_;
+  int seed2_;
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_RANDOMOP_GPU_KERNEL_H_
diff --git a/mindspore/ccsrc/kernel/gpu/math/unary_op_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/math/unary_op_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.cc
index 77f53fc4173..ae8e7bbd0b1 100644
--- a/mindspore/ccsrc/kernel/gpu/math/unary_op_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/math/unary_op_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/math/unary_op_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/math/unary_op_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.h
index 4503b805f6b..26993bc3bd8 100644
--- a/mindspore/ccsrc/kernel/gpu/math/unary_op_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/unary_op_gpu_kernel.h
@@ -21,9 +21,9 @@
 #include <vector>
 #include <string>
 #include <map>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/unary_op_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/unary_op_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nccl/nccl_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nccl/nccl_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_gpu_kernel.cc
index 6993085a75b..c6e3c4c0432 100644
--- a/mindspore/ccsrc/kernel/gpu/nccl/nccl_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nccl/nccl_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nccl/nccl_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nccl/nccl_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_gpu_kernel.h
similarity index 88%
rename from mindspore/ccsrc/kernel/gpu/nccl/nccl_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_gpu_kernel.h
index b5ab46a67d1..9701738bfc7 100644
--- a/mindspore/ccsrc/kernel/gpu/nccl/nccl_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nccl/nccl_gpu_kernel.h
@@ -23,10 +23,10 @@
 #include <vector>
 #include <string>
 #include <map>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "device/gpu/distribution/collective_init.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "runtime/device/gpu/distribution/collective_init.h"
 
 namespace mindspore {
 namespace kernel {
@@ -40,9 +40,11 @@ const std::map<std::string, NcclKernelType> kNcclTypeMap = {
 static std::map<std::string, ncclDataType_t> kNcclDtypeMap = {
   {"kNumberTypeFloat32", ncclFloat}, {"kNumberTypeFloat16", ncclHalf}, {"kNumberTypeInt32", ncclInt}};
 
-typedef ncclResult_t (*AllReduce)(const void *, void *, size_t, ncclDataType_t, ncclRedOp_t, cudaStream_t);
-typedef ncclResult_t (*AllGather)(const void *, void *, size_t, ncclDataType_t, cudaStream_t);
-typedef ncclResult_t (*ReduceScatter)(const void *, void *, size_t, ncclDataType_t, ncclRedOp_t, cudaStream_t);
+typedef ncclResult_t (*AllReduce)(const void *, void *, size_t, ncclDataType_t, ncclRedOp_t, cudaStream_t,
+                                  const std::string &);
+typedef ncclResult_t (*AllGather)(const void *, void *, size_t, ncclDataType_t, cudaStream_t, const std::string &);
+typedef ncclResult_t (*ReduceScatter)(const void *, void *, size_t, ncclDataType_t, ncclRedOp_t, cudaStream_t,
+                                      const std::string &);
 
 template <typename T>
 class NcclGpuKernel : public GpuKernel {
@@ -50,6 +52,7 @@ class NcclGpuKernel : public GpuKernel {
   NcclGpuKernel()
       : nccl_kernel_type_(NCCL_INVALID_TYPE),
         nccl_reduce_type_(ncclSum),
+        group_name_(""),
         input_size_(0),
         output_size_(0),
         collective_handle_(nullptr),
@@ -71,7 +74,7 @@ class NcclGpuKernel : public GpuKernel {
           reinterpret_cast<AllReduce>(dlsym(const_cast<void *>(collective_handle_), "AllReduce"));
         MS_EXCEPTION_IF_NULL(all_reduce_funcptr);
         CHECK_NCCL_RET_WITH_EXCEPT((*all_reduce_funcptr)(input_addr, output_addr, output_size_ / sizeof(T),
-                                                         nccl_data_type_, nccl_reduce_type_, stream),
+                                                         nccl_data_type_, nccl_reduce_type_, stream, group_name_),
                                    "ncclAllReduce failed");
         break;
       }
@@ -80,7 +83,7 @@ class NcclGpuKernel : public GpuKernel {
           reinterpret_cast<AllGather>(dlsym(const_cast<void *>(collective_handle_), "AllGather"));
         MS_EXCEPTION_IF_NULL(all_gather_funcptr);
         CHECK_NCCL_RET_WITH_EXCEPT(
-          (*all_gather_funcptr)(input_addr, output_addr, input_size_ / sizeof(T), nccl_data_type_, stream),
+          (*all_gather_funcptr)(input_addr, output_addr, input_size_ / sizeof(T), nccl_data_type_, stream, group_name_),
           "ncclAllGather failed");
         break;
       }
@@ -89,7 +92,7 @@ class NcclGpuKernel : public GpuKernel {
           reinterpret_cast<ReduceScatter>(dlsym(const_cast<void *>(collective_handle_), "ReduceScatter"));
         MS_EXCEPTION_IF_NULL(reduce_scatter_funcptr);
         CHECK_NCCL_RET_WITH_EXCEPT((*reduce_scatter_funcptr)(input_addr, output_addr, output_size_ / sizeof(T),
-                                                             nccl_data_type_, nccl_reduce_type_, stream),
+                                                             nccl_data_type_, nccl_reduce_type_, stream, group_name_),
                                    "ncclReduceScatter failed");
         break;
       }
@@ -121,15 +124,18 @@ class NcclGpuKernel : public GpuKernel {
       output_size_list_.push_back(size);
       output_size_ += size;
     }
-    InferCommType(kernel_node);
-    collective_handle_ = device::gpu::CollectiveInitializer::instance().collective_handle();
-    MS_EXCEPTION_IF_NULL(collective_handle_);
 
+    InferCommType(kernel_node);
+    group_name_ = GetAttr<std::string>(kernel_node, kAttrGroup);
+    MS_LOG(INFO) << AnfAlgo::GetCNodeName(kernel_node) << " for group " << group_name_;
     auto comm_stream_attr = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stream_id");
     if (comm_stream_attr) {
       comm_stream_ = reinterpret_cast<cudaStream_t>(GetValue<uintptr_t>(comm_stream_attr));
       MS_EXCEPTION_IF_NULL(comm_stream_);
     }
+
+    collective_handle_ = device::gpu::CollectiveInitializer::instance().collective_handle();
+    MS_EXCEPTION_IF_NULL(collective_handle_);
     return true;
   }
 
@@ -146,7 +152,7 @@ class NcclGpuKernel : public GpuKernel {
       nccl_kernel_type_ = iter->second;
     }
 
-    auto reduce_op = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("op");
+    auto reduce_op = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kAttrOp);
     if (reduce_op) {
       std::string type = GetValue<std::string>(reduce_op);
       if (type == "sum") {
@@ -167,6 +173,7 @@ class NcclGpuKernel : public GpuKernel {
   NcclKernelType nccl_kernel_type_;
   ncclRedOp_t nccl_reduce_type_;
   ncclDataType_t nccl_data_type_;
+  std::string group_name_;
   size_t input_size_;
   size_t output_size_;
   std::vector<size_t> input_size_list_;
diff --git a/mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_gpu_kernel.cc
index 5e80cccd75a..334550b2135 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/activation_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/activation_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_gpu_kernel.h
index bf6cfa7b23c..d651da75e0b 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/activation_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_gpu_kernel.h
@@ -20,9 +20,9 @@
 #include <vector>
 #include <map>
 #include <string>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.cc
index 35d11f8b471..8fd486c08cb 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/activation_grad_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/activation_grad_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.h
index 38e34eb7525..ffdb6180987 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/activation_grad_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/activation_grad_kernel.h
@@ -20,9 +20,9 @@
 #include <vector>
 #include <map>
 #include <string>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_gpu_kernel.cc
index 049a5cc2807..0f89eb44196 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/adam_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/adam_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_gpu_kernel.h
index 93c6381ab34..e2fc87ed510 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/adam_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/adam_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_ADAM_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/adam_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/adam_impl.cuh"
 namespace mindspore {
 namespace kernel {
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.cc
index ce6c9beeb7a..6131aa85686 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/bias_add_grad_gpu_kenel.h"
+#include "backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h
index 9b4f18d24c7..3e15b818be3 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/bias_add_grad_gpu_kenel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h
@@ -23,9 +23,9 @@
 #include <string>
 #include <algorithm>
 #include <memory>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.cc
index df6825e0793..f9bb710b943 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/conv2d_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
index f51cbfef337..6072614e22d 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <string>
 #include <algorithm>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/pad_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.cc
index 28e9a10ccc2..ca16e1a18c8 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
index 0d7be25772a..638da4a99f9 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <string>
 #include <algorithm>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/pad_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.cc
index 12b6f91537e..d8441fb67c7 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
index a33ea5b4da4..a9a1e5c0ccb 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <string>
 #include <algorithm>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/pad_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ctcloss_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ctcloss_gpu_kernel.cc
new file mode 100644
index 00000000000..155451875c4
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ctcloss_gpu_kernel.cc
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/nn/ctcloss_gpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_ONE(CTCLossV2,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddOutputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      CtcLossGpuKernel, float)
+
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ctcloss_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ctcloss_gpu_kernel.h
new file mode 100644
index 00000000000..8b023545160
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ctcloss_gpu_kernel.h
@@ -0,0 +1,166 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_CTCLOSS_GPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_NN_CTCLOSS_GPU_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
+
+namespace mindspore {
+namespace kernel {
+template <typename T>
+class CtcLossGpuKernel : public GpuKernel {
+ public:
+  CtcLossGpuKernel()
+      : cudnn_handle_(nullptr),
+        probs_desc_(nullptr),
+        ctcloss_desc_(nullptr),
+        label_size_(0),
+        input_lengths_size_(0),
+        label_lengths_size_(0) {}
+  ~CtcLossGpuKernel() override { DestroyResource(); }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    float *probs = GetDeviceAddress<float>(inputs, 0);
+    int *labels = GetDeviceAddress<int>(inputs, 1);
+    int *input_lengths = GetDeviceAddress<int>(inputs, 2);
+    int *label_lengths = GetDeviceAddress<int>(inputs, 3);
+    float *costs = GetDeviceAddress<float>(outputs, 0);
+    float *grads = GetDeviceAddress<float>(outputs, 1);
+
+    // Copy labels/input_lengths/label_length to host as cudnn7.x.x requires
+    void *labels_host = nullptr;
+    void *input_lengths_host = nullptr;
+    void *label_lengths_host = nullptr;
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaMallocHost(&labels_host, inputs[1]->size), "cudaMallocHost failed.");
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaMallocHost(&input_lengths_host, inputs[2]->size), "cudaMallocHost failed.");
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaMallocHost(&label_lengths_host, inputs[3]->size), "cudaMallocHost failed.");
+    cudaStream_t stream = reinterpret_cast<cudaStream_t>(stream_ptr);
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaMemcpyAsync(labels_host, labels, inputs[1]->size, cudaMemcpyDeviceToHost, stream),
+                               "cudaMemcpyAsync failed.");
+    CHECK_CUDA_RET_WITH_EXCEPT(
+      cudaMemcpyAsync(input_lengths_host, input_lengths, inputs[2]->size, cudaMemcpyDeviceToHost, stream),
+      "cudaMemcpyAsync failed.");
+    CHECK_CUDA_RET_WITH_EXCEPT(
+      cudaMemcpyAsync(label_lengths_host, label_lengths, inputs[3]->size, cudaMemcpyDeviceToHost, stream),
+      "cudaMemcpyAsync failed.");
+
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaStreamSynchronize(stream), "cudaStreamSynchronize failed.");
+    size_t workspace_size = 0;
+    CHECK_CUDNN_RET_WITH_EXCEPT(
+      cudnnGetCTCLossWorkspaceSize(cudnn_handle_, probs_desc_, probs_desc_, reinterpret_cast<int *>(labels_host),
+                                   reinterpret_cast<int *>(label_lengths_host),
+                                   reinterpret_cast<int *>(input_lengths_host), CUDNN_CTC_LOSS_ALGO_DETERMINISTIC,
+                                   ctcloss_desc_, &workspace_size),
+      "cudnnGetCTCLossWorkspaceSize failed.");
+    void *workspace = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(workspace_size);
+    if (workspace == nullptr) {
+      MS_LOG(EXCEPTION) << "Failed to alloc workspace, size: " << workspace_size;
+    }
+
+    CHECK_CUDNN_RET_WITH_EXCEPT(
+      cudnnCTCLoss(cudnn_handle_, probs_desc_, probs, reinterpret_cast<int *>(labels_host),
+                   reinterpret_cast<int *>(label_lengths_host), reinterpret_cast<int *>(input_lengths_host), costs,
+                   probs_desc_, grads, CUDNN_CTC_LOSS_ALGO_DETERMINISTIC, ctcloss_desc_, workspace, workspace_size),
+      "cudnnCtcLoss failed.");
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaStreamSynchronize(stream), "cudaStreamSynchronize failed.");
+
+    device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(workspace);
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaFreeHost(label_lengths_host), "cudaFreeHost failed.");
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaFreeHost(input_lengths_host), "cudaFreeHost failed.");
+    CHECK_CUDA_RET_WITH_EXCEPT(cudaFreeHost(labels_host), "cudaFreeHost failed.");
+    return true;
+  }
+  bool Init(const CNodePtr &kernel_node) override {
+    InitResource();
+    auto probs_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    if (probs_shape.size() != 3) {
+      MS_LOG(EXCEPTION) << "probs dims: " << probs_shape.size() << " not support.";
+    }
+    probs_dims_[0] = probs_shape[0];
+    probs_dims_[1] = probs_shape[1];
+    probs_dims_[2] = probs_shape[2];
+
+    auto labels_dims = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    if (labels_dims.size() != 1 && labels_dims.size() != 2) {
+      MS_LOG(EXCEPTION) << "labels dims: " << labels_dims.size() << " not support.";
+    }
+    label_size_ = sizeof(int);
+    for (auto i : labels_dims) {
+      label_size_ *= i;
+    }
+
+    auto input_length_dims = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
+    input_lengths_size_ = input_length_dims[0] * sizeof(int);
+    auto label_length_dims = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
+    label_lengths_size_ = label_length_dims[0] * sizeof(int);
+    CHECK_CUDNN_RET_WITH_EXCEPT(
+      cudnnSetTensorNdDescriptorEx(probs_desc_, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 3, probs_dims_),
+      "cudnnSetTensorNdDescriptorEx failed.");
+    CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetCTCLossDescriptorEx(ctcloss_desc_, CUDNN_DATA_FLOAT,
+                                                            CUDNN_LOSS_NORMALIZATION_SOFTMAX, CUDNN_PROPAGATE_NAN),
+                                "cudnnSetCTCLossDescriptorEx failed.");
+    InitSizeLists();
+    return true;
+  }
+
+ protected:
+  void InitResource() override {
+    cudnn_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
+    CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateTensorDescriptor(&probs_desc_), "cudnnCreateTensorDescriptor failed.");
+    CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateCTCLossDescriptor(&ctcloss_desc_), "cudnnCreateCTCLossDescriptor failed.");
+  }
+
+  void InitSizeLists() override {
+    input_size_list_.push_back(probs_dims_[0] * probs_dims_[1] * probs_dims_[2] * sizeof(float));
+    input_size_list_.push_back(label_size_);
+    input_size_list_.push_back(input_lengths_size_);
+    input_size_list_.push_back(label_lengths_size_);
+
+    output_size_list_.push_back(probs_dims_[1] * sizeof(float));
+    output_size_list_.push_back(probs_dims_[0] * probs_dims_[1] * probs_dims_[2] * sizeof(float));
+  }
+
+ private:
+  void DestroyResource() noexcept {
+    CHECK_CUDNN_RET_WITH_ERROR(cudnnDestroyCTCLossDescriptor(ctcloss_desc_), "cudnnDestroyCTCLossDescriptor failed.");
+    CHECK_CUDNN_RET_WITH_ERROR(cudnnDestroyTensorDescriptor(probs_desc_), "cudnnDestroyTensorDescriptor failed.");
+  }
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+
+  cudnnHandle_t cudnn_handle_;
+  cudnnTensorDescriptor_t probs_desc_;
+  cudnnCTCLossDescriptor_t ctcloss_desc_;
+  int probs_dims_[3] = {0};
+  int label_size_;
+  int input_lengths_size_;
+  int label_lengths_size_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_NN_CTCLOSS_GPU_KERNEL_H_
diff --git a/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.cc
index 459010e9e99..423a230b6e1 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/dropout_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.h
index 4dfacb7ca13..2104d7af35e 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/dropout_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_DROPOUT_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/dropout_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cuh"
 #include "include/curand.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_grad_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_grad_kernel.cc
index 2fd21c96ee1..faf884c2eba 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_grad_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/dropout_grad_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/dropout_grad_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_grad_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_grad_kernel.h
index e6683e15dd0..a3a7250c9b6 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/dropout_grad_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/dropout_grad_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_DROPOUT_GRAD_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/dropout_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/dropout_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/flatten_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/flatten_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_gpu_kernel.cc
index f9c993d31d1..d8206aedcde 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/flatten_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/flatten_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/flatten_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/flatten_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/flatten_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_gpu_kernel.h
index 3b0ad8c9465..a140579a3c3 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/flatten_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_gpu_kernel.h
@@ -19,8 +19,8 @@
 
 #include <cuda_runtime_api.h>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/flatten_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/flatten_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.cc
index 0e079d137bc..c07126a2edb 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/flatten_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/flatten_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/flatten_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/flatten_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.h
index 0748dc77db1..b21327bc3bf 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/flatten_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/flatten_grad_gpu_kernel.h
@@ -19,8 +19,8 @@
 
 #include <cuda_runtime_api.h>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.cc
index 4d30130931c..01861537456 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/ftrl_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.h
index 9e2153965b9..ea08741dbaf 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/ftrl_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/ftrl_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_FTRL_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/ftrl_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/ftrl_impl.cuh"
 namespace mindspore {
 namespace kernel {
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_adam_weight_decay.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_adam_weight_decay.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/fused_adam_weight_decay.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_adam_weight_decay.cc
index 77cb7f86086..5ef2fd87869 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/fused_adam_weight_decay.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_adam_weight_decay.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/fused_adam_weight_decay.h"
+#include "backend/kernel_compiler/gpu/nn/fused_adam_weight_decay.h"
 
 namespace mindspore {
 namespace kernel {
@@ -47,6 +47,5 @@ MS_REG_GPU_KERNEL_ONE(FusedAdam,
                         .AddInputAttr(kNumberTypeFloat32)
                         .AddOutputAttr(kNumberTypeFloat32),
                       FusedAdamWeightDecayGpuKernel, float)
-
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_adam_weight_decay.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_adam_weight_decay.h
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/nn/fused_adam_weight_decay.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_adam_weight_decay.h
index f13f6ed59fb..c4fd31a7374 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/fused_adam_weight_decay.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_adam_weight_decay.h
@@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_FUSED_ADAM_WEIGHT_DECAY_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "kernel/gpu/cuda_impl/adam_weight_decay_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/adam_weight_decay_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batch_norm_gpu_kernel.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batch_norm_gpu_kernel.cc
index 91747d24d8e..2ce39b63a02 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batch_norm_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/fused_batch_norm_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/fused_batch_norm_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batch_norm_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batch_norm_gpu_kernel.h
index b0a898209be..774428dc409 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batch_norm_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_FUSED_BATCH_NORM_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batchnorm_grad_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batchnorm_grad_gpu_kernel.cc
index 3947aaea9ad..546e034f6bf 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batchnorm_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/fused_batchnorm_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batchnorm_grad_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batchnorm_grad_gpu_kernel.h
index 712354b17cb..a2d0d741b13 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/fused_batchnorm_grad_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_FUSED_BATCHNORM_GRAD_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.cc
index 32d91be80a6..274e4896c98 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/gelu_grad_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/gelu_grad_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.h
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.h
index 64153490129..823da1fe9f0 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/gelu_grad_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_grad_kernel.h
@@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_GELU_GRAD_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "kernel/gpu/cuda_impl/gelu_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.cc
index ca54ff68ad8..03cd9a155bf 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/gelu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/gelu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.h
similarity index 90%
rename from mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.h
index 60968d109b1..76d3861d55c 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/gelu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/gelu_kernel.h
@@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_GELU_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "kernel/gpu/cuda_impl/gelu_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/gelu_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.cc
index 19e4dc17a67..49f556ae642 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/layer_norm_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.h
index d5ec3ff8f25..74669e03de2 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/layer_norm_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/layer_norm_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.cc
index 7991d424991..b59f95b8a2c 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/layer_norm_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.h
index 83bdedb9b35..93967adad32 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/layer_norm_grad_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GRAD_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/layer_norm_grad_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.cc
index c745c216f7b..a24aaeeb965 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/lstm_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h
index 42eda96b02f..ad3e588f00e 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h
@@ -20,9 +20,9 @@
 #include <cuda_runtime_api.h>
 #include <vector>
 #include <memory>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.cc
index ab88308d4e9..1fa47690b35 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/lstm_grad_data_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h
index 6eeefa262c4..6d6bed55555 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h
@@ -20,9 +20,9 @@
 #include <cuda_runtime_api.h>
 #include <vector>
 #include <memory>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.cc
index 856a986e073..9ec239491f8 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.h
index a1a4852c842..445d2ce1997 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_weight_gpu_kernel.h
@@ -20,9 +20,9 @@
 #include <cuda_runtime_api.h>
 #include <vector>
 #include <memory>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 namespace mindspore {
 namespace kernel {
 template <typename T>
diff --git a/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.cc
index e8b2b177068..99ae2affe8c 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/momentum_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.h
index 5abfb9e97b7..32d3fbb079b 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/momentum_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/momentum_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_MOMENTUM_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/momentum_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/momentum_impl.cuh"
 namespace mindspore {
 namespace kernel {
 template <typename T, typename S>
diff --git a/mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.cc
index e871af360a1..902b0d9faf5 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/pooling_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h
index 0dda1e89980..908a4e9b998 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/pooling_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <string>
 #include <algorithm>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/pad_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.cc
index c3d4a449430..2948c900d24 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/pooling_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h
similarity index 98%
rename from mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h
index e8f1ebc1af7..a066eacfa08 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/pooling_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <string>
 #include <algorithm>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/pad_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/pad_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.cc
index 032e8eeec4b..c33909a82b3 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/rmsprop_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.h
index 9e148b690d0..9811c71094e 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/rmsprop_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/rmsprop_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_RMSPROP_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/rmsprop_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/rmsprop_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.cc
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.cc
index 1e650811fdc..96d2d295495 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h
index 8d0efe90b48..a2d3aabb688 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.cc
index dabc4df850e..05c9a4234ba 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h
index 01f416f6b78..88ab46a6ba5 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sigmoid_cross_entropy_with_logits_grad_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sigmoid_cross_entropy_with_logits_grad_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.cc
new file mode 100644
index 00000000000..ea40bea6a4d
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.cc
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_ONE(
+  SmoothL1Loss,
+  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  SmoothL1LossGpuKernel, float)
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.h
new file mode 100644
index 00000000000..dc20f75077d
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_gpu_kernel.h
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_SMOOTH_L1_LOSS_GPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_NN_SMOOTH_L1_LOSS_GPU_KERNEL_H_
+
+#include <vector>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cuh"
+namespace mindspore {
+namespace kernel {
+template <typename T>
+class SmoothL1LossGpuKernel : public GpuKernel {
+ public:
+  SmoothL1LossGpuKernel() : input_size_(1), sigma_(1.0) {}
+  ~SmoothL1LossGpuKernel() override = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    T *prediction = GetDeviceAddress<T>(inputs, 0);
+    T *target = GetDeviceAddress<T>(inputs, 1);
+    T *loss = GetDeviceAddress<T>(outputs, 0);
+
+    SmoothL1Loss(input_size_, sigma_, prediction, target, loss, reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+
+  bool Init(const CNodePtr &kernel_node) override {
+    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    for (size_t i = 0; i < input_shape.size(); i++) {
+      input_size_ *= input_shape[i];
+    }
+
+    sigma_ = GetAttr<float>(kernel_node, "sigma");
+    InitSizeLists();
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(input_size_ * sizeof(T));
+    input_size_list_.push_back(input_size_ * sizeof(T));
+    output_size_list_.push_back(input_size_ * sizeof(T));
+  }
+
+ private:
+  size_t input_size_;
+  float sigma_;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_NN_SMOOTH_L1_LOSS_GPU_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.cc
new file mode 100644
index 00000000000..8a4fb38460a
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.cc
@@ -0,0 +1,29 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_ONE(SmoothL1LossGrad,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      SmoothL1LossGradGpuKernel, float)
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.h
new file mode 100644
index 00000000000..02be336932f
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/smooth_l1_loss_grad_gpu_kernel.h
@@ -0,0 +1,76 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_SMOOTH_L1_LOSS_GRAD_GPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_NN_SMOOTH_L1_LOSS_GRAD_GPU_KERNEL_H_
+
+#include <vector>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/smooth_l1_loss_impl.cuh"
+namespace mindspore {
+namespace kernel {
+template <typename T>
+class SmoothL1LossGradGpuKernel : public GpuKernel {
+ public:
+  SmoothL1LossGradGpuKernel() : input_size_(1), sigma_(1.0) {}
+  ~SmoothL1LossGradGpuKernel() override = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    T *prediction = GetDeviceAddress<T>(inputs, 0);
+    T *target = GetDeviceAddress<T>(inputs, 1);
+    T *dloss = GetDeviceAddress<T>(inputs, 2);
+    T *dx = GetDeviceAddress<T>(outputs, 0);
+
+    SmoothL1LossGrad(input_size_, sigma_, prediction, target, dloss, dx, reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+
+  bool Init(const CNodePtr &kernel_node) override {
+    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    for (size_t i = 0; i < input_shape.size(); i++) {
+      input_size_ *= input_shape[i];
+    }
+
+    sigma_ = GetAttr<float>(kernel_node, "sigma");
+    InitSizeLists();
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(input_size_ * sizeof(T));
+    input_size_list_.push_back(input_size_ * sizeof(T));
+    output_size_list_.push_back(input_size_ * sizeof(T));
+  }
+
+ private:
+  size_t input_size_;
+  float sigma_;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_NN_SMOOTH_L1_LOSS_GRAD_GPU_KERNEL_H_
diff --git a/mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.cc
index 160a26d2007..8a64762c0a1 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h
index 8256174bcba..e56cb96fd75 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_cross_entropy_with_logits_gpu_kernel.h
@@ -19,10 +19,10 @@
 
 #include <stdint.h>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/cross_entropy_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.cc
index b9667ed85ba..24c2c126016 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/softmax_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.h
index 9d5a2a24e14..279bac3aa9e 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/softmax_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_gpu_kernel.h
@@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_SOFTMAX_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "kernel/gpu/cuda_impl/transpose_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.cc
index 5b07136522e..bd20413d08f 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/softmax_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.h
index d73503d5a59..b814be9969b 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/softmax_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/softmax_grad_gpu_kernel.h
@@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_SOFTMAX_GRAD_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "kernel/gpu/cuda_impl/transpose_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/transpose_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.cc
index 537eeb5726a..81b46f520c6 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h
index 6950f0e3086..bcb8a6b333d 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/sparse_softmax_cross_entropy_with_logits_gpu_kernel.h
@@ -19,10 +19,10 @@
 
 #include <stdint.h>
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/cross_entropy_impl.cuh"
-#include "kernel/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/cross_entropy_impl.cuh"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/other/assign_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/other/assign_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc
index 0f3e0c95f49..4e07463a6c7 100644
--- a/mindspore/ccsrc/kernel/gpu/other/assign_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/other/assign_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/other/assign_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/other/assign_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/other/assign_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.h
index b41d583a430..76e863393c0 100644
--- a/mindspore/ccsrc/kernel/gpu/other/assign_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_ASSIGN_GPU_KERNEL_H
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.cc
index af957674076..92652f67f9c 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/batchnorm_fold2_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.h
index b898f34689b..83600e20df6 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_BATCHNORMFOLD2_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/batchnorm_fold2_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold2_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.cc
index 93862aeeddf..6fc080713af 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h
index e0bafdb96a1..33352109250 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold2_grad_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_NN_BATCHNORMFOLD2_GRAD_GPU_KERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/batchnorm_fold2_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold2_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.cc
index 4f968a0fa32..95349c84aaa 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/batchnorm_fold_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.h
similarity index 97%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.h
index 6cd001fd2eb..11b150686c3 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_gpu_kernel.h
@@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_BATCHNORM_FOLD_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/kernel_constants.h"
-#include "kernel/gpu/cuda_impl/batchnorm_fold_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/kernel_constants.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.cc
index 93ea66258d8..b727c6c7df2 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.h
index 7a3ed7ef91b..93a3cbf46eb 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/batchnorm_fold_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/batchnorm_fold_grad_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_BATCHNORM_FOLD_GRAD_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/batchnorm_fold_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/batchnorm_fold_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.cc
index a914b6ec14f..9af5451c531 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/correction_mul_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.h
index 29aeabb03a1..4ba6285e4b2 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_CORRECTIONMUL_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/correction_mul_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.cc
similarity index 88%
rename from mindspore/ccsrc/kernel/gpu/quant/correction_mul_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.cc
index 28b5d56e684..63a47bc452b 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/correction_mul_grad_gpu_kernel.h"
-#include "kernel/gpu/cuda_impl/correction_mul_impl.cuh"
+#include "backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/quant/correction_mul_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.h
index 3feffa586b2..b9fcbf0787d 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/correction_mul_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/correction_mul_grad_gpu_kernel.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_CORRECTIONMULGRAD_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/gpu/cuda_impl/correction_mul_impl.cuh"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/correction_mul_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_gpu_kernel.cc
index 8db6ddd8487..8a43ce09410 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_gpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.h"
-#include "kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cuh"
+#include "backend/kernel_compiler/gpu/quant/fake_quant_perchannel_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/fake_quant_perchannel_impl.cuh"
 #include <thrust/extrema.h>
 #include <thrust/pair.h>
 #include <thrust/device_vector.h>
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_gpu_kernel.h
similarity index 92%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_gpu_kernel.h
index 122fe96af32..8e2c9524b2f 100755
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_PER_CHANNEL_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.cc
index 5c774c05edd..598a6a960d1 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h"
-#include "kernel/gpu/cuda_impl/fake_quant_perchannel_impl.cuh"
+#include "backend/kernel_compiler/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/fake_quant_perchannel_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h
index d863a2c99f5..c2611ab8a25 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perchannel_grad_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_PER_CHANNEL_GRAD_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_gpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_gpu_kernel.cc
index 44869983eb8..24edec97a91 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_gpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.h"
-#include "kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cuh"
+#include "backend/kernel_compiler/gpu/quant/fake_quant_perlayer_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/fake_quant_perlayer_impl.cuh"
 #include <thrust/extrema.h>
 #include <thrust/pair.h>
 #include <thrust/device_vector.h>
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_gpu_kernel.h
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_gpu_kernel.h
index 38810e06dfb..6df4da31043 100755
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_PERLAYER_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.cc
index c8d57b2bb1f..f96b6a48d22 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h"
-#include "kernel/gpu/cuda_impl/fake_quant_perlayer_impl.cuh"
+#include "backend/kernel_compiler/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/fake_quant_perlayer_impl.cuh"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h
similarity index 91%
rename from mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h
index ae2ea5bfacc..475723f684c 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/fake_quant_perlayer_grad_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_FAKEQUANT_PERLAYER_GRAD_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perchannel_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perchannel_gpu_kernel.cc
index a8ce72148b2..742a9b8c55c 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perchannel_gpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.h"
-#include "kernel/gpu/cuda_impl/minmax_update_impl.cuh"
+#include "backend/kernel_compiler/gpu/quant/minmax_update_perchannel_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cuh"
 #include <thrust/extrema.h>
 #include <thrust/pair.h>
 #include <thrust/device_vector.h>
diff --git a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perchannel_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perchannel_gpu_kernel.h
index 563a583ca1d..9a0fe23e6af 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perchannel_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERCHANNEL_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perlayer_gpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perlayer_gpu_kernel.cc
index 3659665b23c..8f11e907e17 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perlayer_gpu_kernel.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.h"
-#include "kernel/gpu/cuda_impl/minmax_update_impl.cuh"
+#include "backend/kernel_compiler/gpu/quant/minmax_update_perlayer_gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/minmax_update_impl.cuh"
 #include <thrust/extrema.h>
 #include <thrust/pair.h>
 #include <thrust/device_vector.h>
diff --git a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perlayer_gpu_kernel.h
similarity index 94%
rename from mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perlayer_gpu_kernel.h
index a237b6dc266..80ce6185c03 100644
--- a/mindspore/ccsrc/kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/quant/minmax_update_perlayer_gpu_kernel.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERLAYER_GPUKERNEL_H_
 
 #include <vector>
-#include "kernel/gpu/gpu_kernel.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc
similarity index 89%
rename from mindspore/ccsrc/kernel/hccl/hccl_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc
index 87fb8d743d5..5ec4f52574c 100644
--- a/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc
@@ -14,10 +14,11 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hccl_kernel.h"
-#include "device/ascend/tasksink/runtime_utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel.h"
+#include "runtime/device/ascend/tasksink/runtime_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
+#include "utils/context/ms_context.h"
 
 using HcclTaskInfoPtr = std::shared_ptr<ge::model_runner::HcclTaskInfo>;
 using ge::model_runner::HcclTaskInfo;
@@ -146,10 +147,12 @@ std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inpu
                << ", root_id=" << root_id_ << ", op_type=" << static_cast<int>(op_type_)
                << ", data_type=" << static_cast<int>(data_type);
 
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
   HcclTaskInfoPtr task_info_ptr = std::make_shared<HcclTaskInfo>(
-    stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0, private_def, nullptr,
-    hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel, RuntimeUtils::HcomUnbindModel,
-    RuntimeUtils::HcomDistribute);
+    kernel_name_, stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0,
+    private_def, nullptr, hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel,
+    RuntimeUtils::HcomUnbindModel, RuntimeUtils::HcomDistribute, NeedDump());
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h
similarity index 96%
rename from mindspore/ccsrc/kernel/hccl/hccl_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h
index 72e202591fc..db7a0fbf7ca 100644
--- a/mindspore/ccsrc/kernel/hccl/hccl_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h
@@ -23,8 +23,8 @@
 #include <vector>
 #include <algorithm>
 #include <utility>
-#include "kernel/ascend_kernel_mod.h"
-#include "kernel/hccl/hcom_util.h"
+#include "backend/kernel_compiler/ascend_kernel_mod.h"
+#include "backend/kernel_compiler/hccl/hcom_util.h"
 #include "hccl/hcom.h"
 #include "common/utils.h"
 
diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_build.cc
similarity index 88%
rename from mindspore/ccsrc/kernel/hccl/hccl_kernel_build.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_build.cc
index d6e4aa09b9d..8297be0b6d7 100644
--- a/mindspore/ccsrc/kernel/hccl/hccl_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_build.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hccl_kernel_build.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel_build.h"
 
 #include <string>
 #include <memory>
 #include <algorithm>
 
-#include "kernel/hccl/hccl_kernel.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_build.h
similarity index 95%
rename from mindspore/ccsrc/kernel/hccl/hccl_kernel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_build.h
index f20760a3ebb..21b34d65223 100644
--- a/mindspore/ccsrc/kernel/hccl/hccl_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_build.h
@@ -19,7 +19,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc
similarity index 68%
rename from mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc
index 601d5cf1ea1..55742d383c1 100755
--- a/mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc
@@ -14,14 +14,32 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hccl_kernel_metadata.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel_metadata.h"
 #include <memory>
+#include <set>
 #include "utils/utils.h"
-#include "kernel/hccl/hcom_util.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/hccl/hcom_util.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace kernel {
+namespace {
+std::string GetKernelFormat(const CNodePtr &kernel_node, size_t index) {
+  const std::set<std::string> kReduceNoSupportedSet = {kOpFormat_FRAC_Z, kOpFormat_FRACTAL_Z_C04, kOpFormat_C1HWNCoC0};
+  auto op_name = AnfAlgo::GetCNodeName(kernel_node);
+  auto format = AnfAlgo::GetPrevNodeOutputFormat(kernel_node, index);
+  if (op_name != kReduceScatter && op_name != kAllGatherOpName) {
+    return format;
+  }
+  if (format == kOpFormat_FRAC_NZ && AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, index).size() <= 2) {
+    return kOpFormat_DEFAULT;
+  }
+  if (kReduceNoSupportedSet.find(format) != kReduceNoSupportedSet.end()) {
+    return kOpFormat_DEFAULT;
+  }
+  return format;
+}
+}  // namespace
 void HcclMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) {
   const std::vector<TypeId> kHcclSupportTypes = {kNumberTypeInt8, kNumberTypeInt32, kNumberTypeFloat16,
                                                  kNumberTypeFloat32, kNumberTypeInt16};
@@ -36,13 +54,13 @@ void HcclMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<K
     std::vector<std::string> inputs_format{};
     std::vector<TypeId> inputs_type{};
     for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) {
-      inputs_format.emplace_back(AnfAlgo::GetPrevNodeOutputFormat(kernel_node, input_index));
+      inputs_format.emplace_back(GetKernelFormat(kernel_node, input_index));
       inputs_type.push_back(type);
     }
     std::vector<std::string> outputs_format;
     std::vector<TypeId> outputs_type;
     for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) {
-      outputs_format.emplace_back(AnfAlgo::GetPrevNodeOutputFormat(kernel_node, output_index));
+      outputs_format.emplace_back(GetKernelFormat(kernel_node, output_index));
       outputs_type.push_back(type);
     }
     auto builder = KernelBuildInfo::KernelBuildInfoBuilder();
diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.h
similarity index 95%
rename from mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.h
index b13393d3bd7..25891fdaf6f 100755
--- a/mindspore/ccsrc/kernel/hccl/hccl_kernel_metadata.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.h
@@ -18,7 +18,7 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.cc
index 9dbe708ef91..e9fb4c9314b 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hcom_all_broadcast.h"
+#include "backend/kernel_compiler/hccl/hcom_all_broadcast.h"
 
 #include <algorithm>
 #include <string>
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.h
similarity index 96%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.h
index ca8eba91af3..6434b5fb9c5 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_broadcast.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.h
@@ -20,7 +20,7 @@
 #include <vector>
 #include <memory>
 #include "hccl/hcom.h"
-#include "kernel/hccl/hccl_kernel.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_gather.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_gather.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc
index 6494f7fd12f..201071dcb5f 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_gather.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hcom_all_gather.h"
+#include "backend/kernel_compiler/hccl/hcom_all_gather.h"
 
 #include <algorithm>
 #include <string>
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_gather.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h
similarity index 95%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_gather.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h
index 5de2c513cff..21d8ffa4843 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_gather.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h
@@ -20,7 +20,7 @@
 #include <vector>
 #include <memory>
 #include "hccl/hcom.h"
-#include "kernel/hccl/hccl_kernel.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_reduce.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc
index 35a058e766a..533ce1b0874 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hcom_all_reduce.h"
+#include "backend/kernel_compiler/hccl/hcom_all_reduce.h"
 
 #include <algorithm>
 #include <memory>
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.h
similarity index 95%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_reduce.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.h
index 939abd9de7f..39641f74489 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.h
@@ -19,7 +19,7 @@
 
 #include <memory>
 #include <vector>
-#include "kernel/hccl/hccl_kernel.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc
index dea516885d7..32c6dacb015 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hcom_all_reduce_scatter.h"
+#include "backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h"
 
 #include <algorithm>
 #include <string>
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h
similarity index 96%
rename from mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h
index c734b517c67..2f4ace5aea6 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_all_reduce_scatter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h
@@ -20,7 +20,7 @@
 #include <vector>
 #include <memory>
 #include "hccl/hcom.h"
-#include "kernel/hccl/hccl_kernel.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_util.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/hccl/hcom_util.cc
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc
index 088dbe59d5b..721c1b6ba04 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_util.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "kernel/hccl/hcom_util.h"
+#include "backend/kernel_compiler/hccl/hcom_util.h"
 
 #include <memory>
 
-#include "kernel/common_utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_util.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h
similarity index 100%
rename from mindspore/ccsrc/kernel/hccl/hcom_util.h
rename to mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h
diff --git a/mindspore/ccsrc/kernel/kash/kernel_pack.cc b/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/kash/kernel_pack.cc
rename to mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc
index a87441031bc..9933826f2bc 100644
--- a/mindspore/ccsrc/kernel/kash/kernel_pack.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc
@@ -15,12 +15,11 @@
  */
 
 #include <fstream>
-#include "mindspore/ccsrc/kernel/kernel.h"
-#include "kernel/kernel.h"
-#include "kernel/akg/akg_kernel_build.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/akg/akg_kernel_build.h"
 #include "nlohmann/json.hpp"
 #include "securec/include/securec.h"
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include "utils/log_adapter.h"
 #include "utils/convert_utils.h"
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/kernel.h b/mindspore/ccsrc/backend/kernel_compiler/kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/kernel.h
index 7bccce49c3a..2d240338f3c 100644
--- a/mindspore/ccsrc/kernel/kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel.h
@@ -23,7 +23,7 @@
 #include "ir/dtype.h"
 #include "utils/utils.h"
 #include "ir/tensor.h"
-#include "pipeline/static_analysis/dshape.h"
+#include "abstract/dshape.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
@@ -129,6 +129,10 @@ class KernelMod {
   virtual std::vector<size_t> GenParameters() { return {}; }
 
   virtual ~KernelMod() = default;
+  void set_kernel_name(const std::string &kernel_name) { kernel_name_ = kernel_name; }
+
+ protected:
+  std::string kernel_name_;
 };
 using KernelModPtr = std::shared_ptr<KernelMod>;
 }  // namespace kernel
diff --git a/mindspore/ccsrc/kernel/kernel_build_info.cc b/mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/kernel_build_info.cc
rename to mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.cc
index c912a0c199b..68392d18716 100644
--- a/mindspore/ccsrc/kernel/kernel_build_info.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include <algorithm>
 #include "utils/log_adapter.h"
 #include "debug/anf_ir_dump.h"
@@ -119,6 +119,8 @@ bool KernelBuildInfo::IsInputDefaultPadding() const { return input_reshape_type_
 
 bool KernelBuildInfo::IsOutputDefaultPadding() const { return output_reshape_type_.empty(); }
 
+bool KernelBuildInfo::operator!=(const KernelBuildInfo &other) const { return !((*this) == other); }
+
 void KernelBuildInfo::KernelBuildInfoBuilder::SetKernelType(const KernelType &kernel_type) {
   MS_EXCEPTION_IF_NULL(kernel_build_info_);
   kernel_build_info_->kernel_type_ = kernel_type;
diff --git a/mindspore/ccsrc/kernel/kernel_build_info.h b/mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.h
similarity index 97%
rename from mindspore/ccsrc/kernel/kernel_build_info.h
rename to mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.h
index ca1083fd688..be243c9ae0c 100644
--- a/mindspore/ccsrc/kernel/kernel_build_info.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.h
@@ -22,7 +22,7 @@
 #include <string>
 #include <utility>
 #include "ir/dtype.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace kernel {
@@ -85,6 +85,8 @@ class KernelBuildInfo {
 
   bool operator==(const KernelBuildInfo &other) const;
 
+  bool operator!=(const KernelBuildInfo &other) const;
+
  public:
   static auto constexpr kInvalidFormat = "InvalidFormat";
 
diff --git a/mindspore/ccsrc/kernel/kernel_fusion.cc b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/kernel_fusion.cc
rename to mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
index be79eca15a8..0045e49bef9 100644
--- a/mindspore/ccsrc/kernel/kernel_fusion.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 
 #include <map>
 #include <string>
@@ -22,10 +22,10 @@
 #include <utility>
 
 #include "common/utils.h"
-#include "kernel/tbe/tbe_kernel_build.h"
-#include "kernel/tbe/tbe_kernel_parallel_build.h"
-#include "kernel/tbe/tbe_utils.h"
-#include "kernel/tbe/tbe_convert_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/kernel_fusion.h b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
similarity index 96%
rename from mindspore/ccsrc/kernel/kernel_fusion.h
rename to mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
index 8ded21787c3..2fb3a05b4b2 100644
--- a/mindspore/ccsrc/kernel/kernel_fusion.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_KERNEL_KERNELFUSION_H_
 #include <vector>
 #include <map>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 namespace mindspore {
 namespace kernel {
 /*
diff --git a/mindspore/ccsrc/kernel/kernel_query.cc b/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/kernel_query.cc
rename to mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc
index 4a8ae81afa4..81b5d0f996b 100755
--- a/mindspore/ccsrc/kernel/kernel_query.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "kernel/kernel_query.h"
+#include "backend/kernel_compiler/kernel_query.h"
 #include <memory>
 #include <algorithm>
-#include "kernel/aicpu/aicpu_kernel_metadata.h"
-#include "kernel/rts/rt_kernel_info.h"
-#include "kernel/hccl/hccl_kernel_metadata.h"
-#include "kernel/tbe/tbe_kernel_select/tbe_kernel_select.h"
-#include "kernel/akg/akg_kernel_metadata.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel_metadata.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h"
+#include "backend/kernel_compiler/akg/akg_kernel_metadata.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/kernel_query.h b/mindspore/ccsrc/backend/kernel_compiler/kernel_query.h
similarity index 93%
rename from mindspore/ccsrc/kernel/kernel_query.h
rename to mindspore/ccsrc/backend/kernel_compiler/kernel_query.h
index 257b0cf0735..20458f48d0a 100644
--- a/mindspore/ccsrc/kernel/kernel_query.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_query.h
@@ -20,8 +20,8 @@
 #include <vector>
 #include <string>
 #include <memory>
-#include "kernel/kernel.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/oplib/opinfo.h b/mindspore/ccsrc/backend/kernel_compiler/oplib/opinfo.h
similarity index 91%
rename from mindspore/ccsrc/kernel/oplib/opinfo.h
rename to mindspore/ccsrc/backend/kernel_compiler/oplib/opinfo.h
index f224a97efc9..64ae1009d15 100644
--- a/mindspore/ccsrc/kernel/oplib/opinfo.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/oplib/opinfo.h
@@ -21,7 +21,7 @@
 #include <memory>
 #include <unordered_map>
 #include "ir/dtype.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace kernel {
@@ -103,13 +103,14 @@ class OpInfo {
     partial_flag_ = opinfo.partial_flag_;
     dynamic_format_ = opinfo.dynamic_format_;
     op_pattern_ = opinfo.op_pattern();
-    for (auto attr : opinfo.attrs_ptr()) {
+    processor_ = opinfo.processor_;
+    for (const auto &attr : opinfo.attrs_ptr()) {
       attrs_ptr_.push_back(std::make_shared<OpAttr>(*attr));
     }
-    for (auto input : opinfo.inputs_ptr()) {
+    for (const auto &input : opinfo.inputs_ptr()) {
       inputs_ptr_.push_back(std::make_shared<OpIOInfo>(*input));
     }
-    for (auto output : opinfo.outputs_ptr()) {
+    for (const auto &output : opinfo.outputs_ptr()) {
       outputs_ptr_.push_back(std::make_shared<OpIOInfo>(*output));
     }
     ref_infos_ = opinfo.ref_infos();
@@ -121,6 +122,7 @@ class OpInfo {
   std::string fusion_type() const { return fusion_type_; }
   std::string kernel_name() const { return kernel_name_; }
   OpPattern op_pattern() const { return op_pattern_; }
+  std::string processor() const { return processor_; }
   std::vector<std::shared_ptr<OpAttr>> attrs_ptr() const { return attrs_ptr_; }
   std::vector<std::shared_ptr<OpIOInfo>> inputs_ptr() const { return inputs_ptr_; }
   std::vector<std::shared_ptr<OpIOInfo>> outputs_ptr() const { return outputs_ptr_; }
@@ -136,6 +138,7 @@ class OpInfo {
   void set_kernel_name(const std::string &kernel_name) { kernel_name_ = kernel_name; }
   void set_partial_flag(const bool partial_flag) { partial_flag_ = partial_flag; }
   void set_op_pattern(const OpPattern op_pattern) { op_pattern_ = op_pattern; }
+  void set_processor(const std::string &processor) { processor_ = processor; }
   void add_attrs_ptr(const std::shared_ptr<OpAttr> &attr) { attrs_ptr_.push_back(attr); }
   void add_inputs_ptr(const std::shared_ptr<OpIOInfo> &input) { inputs_ptr_.push_back(input); }
   void add_outputs_ptr(const std::shared_ptr<OpIOInfo> &output) { outputs_ptr_.push_back(output); }
@@ -144,6 +147,10 @@ class OpInfo {
   void add_ref_pair(size_t out_index, size_t in_index) { (void)ref_infos_.emplace(out_index, in_index); }
   void ClearInputs() { (void)inputs_ptr_.clear(); }
   void ClearOutputs() { (void)outputs_ptr_.clear(); }
+  bool equals_to(const std::shared_ptr<OpInfo> &other_info) const {
+    return this->op_name_ == other_info->op_name_ && this->imply_type_ == other_info->imply_type_ &&
+           this->processor_ == other_info->processor_;
+  }
 
  private:
   std::string op_name_;
@@ -157,6 +164,7 @@ class OpInfo {
   bool partial_flag_ = false;
   bool dynamic_format_ = false;
   OpPattern op_pattern_ = kCommonPattern;
+  std::string processor_;
   std::vector<std::shared_ptr<OpAttr>> attrs_ptr_;
   std::vector<std::shared_ptr<OpIOInfo>> inputs_ptr_;
   std::vector<std::shared_ptr<OpIOInfo>> outputs_ptr_;
diff --git a/mindspore/ccsrc/kernel/oplib/oplib.cc b/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.cc
similarity index 80%
rename from mindspore/ccsrc/kernel/oplib/oplib.cc
rename to mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.cc
index e01bbe9162c..69c4ca7db1e 100644
--- a/mindspore/ccsrc/kernel/oplib/oplib.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.cc
@@ -14,11 +14,12 @@
  * limitations under the License.
  */
 
-#include "kernel/oplib/oplib.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 #include <pybind11/pybind11.h>
 #include <unordered_map>
 #include <memory>
 #include <map>
+#include <fstream>
 #include "utils/log_adapter.h"
 #include "utils/overload.h"
 #include "utils/context/ms_context.h"
@@ -44,9 +45,10 @@ constexpr auto kAttr = "attr";
 constexpr auto kIputs = "inputs";
 constexpr auto kOutputs = "outputs";
 constexpr auto kAiCPU = "AiCPU";
+constexpr auto kAiCore = "AiCore";
+constexpr auto kCUDA = "CUDA";
 constexpr auto kTbe = "TBE";
-constexpr auto kAkg = "akg";
-constexpr auto kAutodiff = "AutoDiff";
+constexpr auto kAkg = "AKG";
 constexpr auto kName = "name";
 constexpr auto kParamType = "param_type";
 constexpr auto kDtype = "dtype";
@@ -57,9 +59,10 @@ constexpr auto kIndex = "index";
 constexpr auto kFormat = "format";
 constexpr auto kNeedCompile = "need_compile";
 constexpr auto kShape = "shape";
+constexpr auto kProcessor = "processor";
 std::vector<std::shared_ptr<OpInfo>> OpLib::op_info_;
 
-std::string ImplTypeToStr(OpImplyType impl_type) {
+static std::string ImplTypeToStr(OpImplyType impl_type) {
   switch (impl_type) {
     case kTBE:
       return kTbe;
@@ -80,7 +83,7 @@ bool OpLib::RegOp(const std::string &json_string, const std::string &impl_path)
     if (imply_type_string == kTbe) {
       OpImplyType imply_type = kTBE;
       ret = DecodeOpInfo(op_json, imply_type, impl_path);
-    } else if (imply_type_string == kAutodiff) {
+    } else if (imply_type_string == kAkg) {
       OpImplyType imply_type = kAKG;
       ret = DecodeOpInfo(op_json, imply_type, impl_path);
     } else if (imply_type_string == kAiCPU) {
@@ -124,6 +127,55 @@ void OpLib::DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_p
   }
 }
 
+void OpLib::DecodeAKGSpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info) {
+  MS_EXCEPTION_IF_NULL(op_info);
+  op_info->set_processor(obj.at(kProcessor));
+}
+
+bool OpLib::RegOpFromLocalInfo() {
+  MS_LOG(INFO) << "Start";
+  static bool has_load = false;
+  if (has_load) {
+    return true;
+  }
+  has_load = true;
+  std::string dir = common::GetEnv("MINDSPORE_OP_INFO_PATH");
+  if (dir.empty()) {
+    MS_LOG(INFO) << "MindSpore op info path does not been setted. use op info from python pass.";
+    return true;
+  }
+  char real_path[PATH_MAX] = {0};
+  if (dir.size() >= PATH_MAX) {
+    MS_LOG(ERROR) << "Op info path is invalid: " << dir;
+    return false;
+  }
+#if defined(_WIN32) || defined(_WIN64)
+  if (_fullpath(real_path, common::SafeCStr(dir), PATH_MAX) == nullptr) {
+    MS_LOG(ERROR) << "Op info path is invalid: " << dir;
+    return false;
+  }
+#else
+  if (realpath(common::SafeCStr(dir), real_path) == nullptr) {
+    MS_LOG(ERROR) << "Op info path is invalid: " << dir;
+    return false;
+  }
+#endif
+  MS_LOG(INFO) << "Start to read op info from local file.";
+  std::ifstream file(real_path);
+  if (!file.is_open()) {
+    MS_LOG(ERROR) << "Find op info file failed.";
+    return false;
+  }
+  std::string line;
+  while (getline(file, line)) {
+    if (!line.empty()) {
+      (void)OpLib::RegOp(line, "");
+    }
+  }
+  MS_LOG(INFO) << "End";
+  return true;
+}
+
 bool OpLib::DecodeOpInfo(const nlohmann::json &obj, const mindspore::kernel::OpImplyType imply_type,
                          const std::string &impl_path) {
   std::shared_ptr<OpInfo> op_info = std::make_shared<OpInfo>();
@@ -134,6 +186,8 @@ bool OpLib::DecodeOpInfo(const nlohmann::json &obj, const mindspore::kernel::OpI
   op_info->set_fusion_type(obj.at(kFusionType));
   if (imply_type == kTBE) {
     DecodeTBESpecificInfo(obj, op_info);
+  } else if (imply_type == kAKG) {
+    DecodeAKGSpecificInfo(obj, op_info);
   }
   auto attrs = obj.at(kAttr);
   for (const auto &attr : attrs) {
@@ -160,14 +214,16 @@ bool OpLib::DecodeOpInfo(const nlohmann::json &obj, const mindspore::kernel::OpI
       return false;
     }
   }
+  if (CheckRepetition(op_info)) {
+    MS_LOG(WARNING) << "This op info has been already registed. op name: " << op_info->op_name()
+                    << ", impl type: " << ImplTypeToStr(op_info->imply_type())
+                    << ", impl path: " << op_info->impl_path();
+    return true;
+  }
   if (!GetRefInfo(op_info)) {
     MS_LOG(ERROR) << "GetRefInfo Failed";
     return false;
   }
-  if (!CheckRepetition(op_info)) {
-    MS_LOG(ERROR) << "CheckRepetition Failed";
-    return false;
-  }
   op_info_.push_back(op_info);
   return true;
 }
@@ -269,6 +325,9 @@ bool OpLib::DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply
 }
 
 std::shared_ptr<OpInfo> OpLib::FindOp(const std::string &op_name, OpImplyType imply_type) {
+  if (!OpLib::RegOpFromLocalInfo()) {
+    MS_LOG(INFO) << "Warning reg local op info failed.";
+  }
   auto context = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context);
   bool is_gpu = (context->device_target() == kGPUDevice);
@@ -280,11 +339,16 @@ std::shared_ptr<OpInfo> OpLib::FindOp(const std::string &op_name, OpImplyType im
   for (const auto &op_info : op_info_) {
     MS_EXCEPTION_IF_NULL(op_info);
     if (op_info->op_name() == op_name && op_info->imply_type() == imply_type) {
-      return op_info;
+      auto akg_processor_match = [&]() {
+        return is_gpu ? op_info->processor() == kCUDA : op_info->processor() == kAiCore;
+      };
+      if (imply_type != kAKG || akg_processor_match()) {
+        return op_info;
+      }
     }
   }
-  MS_LOG(DEBUG) << "FindOp failed: opname: " << op_name << ", imply_type: " << ImplTypeToStr(imply_type)
-                << ", current op num: " << op_info_.size();
+  MS_LOG(INFO) << "FindOp failed: opname: " << op_name << ", imply_type: " << ImplTypeToStr(imply_type)
+               << ", current op num: " << op_info_.size();
   return nullptr;
 }
 
@@ -316,14 +380,11 @@ bool OpLib::CheckRepetition(const std::shared_ptr<OpInfo> &op_info) {
   MS_EXCEPTION_IF_NULL(op_info);
   for (const auto &exist_op_info : op_info_) {
     MS_EXCEPTION_IF_NULL(exist_op_info);
-    if (exist_op_info->op_name() == op_info->op_name() && exist_op_info->imply_type() == op_info->imply_type() &&
-        exist_op_info->impl_path() != op_info->impl_path()) {
-      MS_LOG(ERROR) << "Op has already exist, please use other name, op name: " << op_info->op_name()
-                    << " op type: " << ImplTypeToStr(op_info->imply_type());
-      return false;
+    if (exist_op_info->equals_to(op_info)) {
+      return true;
     }
   }
-  return true;
+  return false;
 }
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/kernel/oplib/oplib.h b/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h
similarity index 85%
rename from mindspore/ccsrc/kernel/oplib/oplib.h
rename to mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h
index 47183455a24..845edbfc2ac 100644
--- a/mindspore/ccsrc/kernel/oplib/oplib.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h
@@ -20,7 +20,7 @@
 #include <string>
 #include <memory>
 #include <nlohmann/json.hpp>
-#include "kernel/oplib/opinfo.h"
+#include "backend/kernel_compiler/oplib/opinfo.h"
 
 namespace mindspore {
 namespace kernel {
@@ -28,11 +28,8 @@ class OpLib {
  public:
   OpLib() = default;
   virtual ~OpLib() = default;
-  bool RegOp(const std::string &json_string, const std::string &impl_path);
-  static void RegOpInfo(std::shared_ptr<OpInfo> opinfo) {
-    op_info_.emplace_back(opinfo);
-    return;
-  }
+  static bool RegOp(const std::string &json_string, const std::string &impl_path);
+  static void RegOpInfo(const std::shared_ptr<OpInfo> &opinfo) { op_info_.emplace_back(opinfo); }
   static std::shared_ptr<OpInfo> FindOp(const std::string &op_name, OpImplyType imply_type);
   static const std::vector<std::shared_ptr<OpInfo>> &GetAllOpsInfo() { return op_info_; }
 
@@ -40,12 +37,14 @@ class OpLib {
   static std::vector<std::shared_ptr<OpInfo>> op_info_;
 
  private:
+  static bool RegOpFromLocalInfo();
   static bool DecodeOpInfo(const nlohmann::json &obj, const OpImplyType imply_type, const std::string &impl_path);
   static bool DecodeAttr(const nlohmann::json &obj, const OpImplyType imply_type,
                          const std::shared_ptr<OpInfo> &op_info);
   static bool DecodeDtypeFormat(const nlohmann::json &dtype_format, const std::shared_ptr<OpIOInfo> &op_io,
                                 size_t index);
   static void DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
+  static void DecodeAKGSpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
   static bool DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply_type, const OpIOType io_type,
                                 const std::shared_ptr<OpInfo> &op_info, const nlohmann::json &dtype_format);
   static bool GetRefInfo(const std::shared_ptr<OpInfo> &op_info);
diff --git a/mindspore/ccsrc/kernel/oplib/oploader.h b/mindspore/ccsrc/backend/kernel_compiler/oplib/oploader.h
similarity index 96%
rename from mindspore/ccsrc/kernel/oplib/oploader.h
rename to mindspore/ccsrc/backend/kernel_compiler/oplib/oploader.h
index dd4c37e80b8..6b2981e5b35 100644
--- a/mindspore/ccsrc/kernel/oplib/oploader.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/oplib/oploader.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_OPLOADER_H
 
 #include <vector>
-#include "kernel/oplib/oplib.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/assign.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/assign.cc
similarity index 87%
rename from mindspore/ccsrc/kernel/rts/assign.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/assign.cc
index 7f214b6e6f6..552468bb714 100644
--- a/mindspore/ccsrc/kernel/rts/assign.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/assign.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/assign.h"
+#include "backend/kernel_compiler/rts/assign.h"
 
 #include <memory>
 
@@ -58,8 +58,9 @@ std::vector<TaskInfoPtr> AssignKernel::GenTask(const std::vector<AddressPtr> &in
   }
   stream_id_ = stream_id;
 
-  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>(
-    stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE);
+  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr =
+    std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr,
+                                          inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE, false);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/rts/assign.h b/mindspore/ccsrc/backend/kernel_compiler/rts/assign.h
similarity index 92%
rename from mindspore/ccsrc/kernel/rts/assign.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/assign.h
index 0e7e52d48fe..cff946cc366 100644
--- a/mindspore/ccsrc/kernel/rts/assign.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/assign.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_RTS_ASSIGN_H
 
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/label_goto.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/label_goto.cc
similarity index 91%
rename from mindspore/ccsrc/kernel/rts/label_goto.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/label_goto.cc
index 7bcf42a210d..8ec460fe0ba 100644
--- a/mindspore/ccsrc/kernel/rts/label_goto.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/label_goto.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/label_goto.h"
+#include "backend/kernel_compiler/rts/label_goto.h"
 #include <asm-generic/param.h>
 #include <memory>
 #include "runtime/stream.h"
 #include "framework/ge_runtime/task_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 using ge::model_runner::LabelGotoTaskInfo;
@@ -55,7 +55,8 @@ std::vector<TaskInfoPtr> LabelGotoKernel::GenTask(const std::vector<AddressPtr>
                                                   const std::vector<AddressPtr> &, uint32_t stream_id) {
   MS_LOG(INFO) << "LabelGotoKernel GenTask label:" << label_ << ", stream id:" << stream_id;
   std::vector<TaskInfoPtr> task_info_list;
-  std::shared_ptr<LabelGotoTaskInfo> task_info_ptr = std::make_shared<LabelGotoTaskInfo>(stream_id, label_);
+  std::shared_ptr<LabelGotoTaskInfo> task_info_ptr =
+    std::make_shared<LabelGotoTaskInfo>(kernel_name_, stream_id, label_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   task_info_list.emplace_back(task_info_ptr);
   return task_info_list;
diff --git a/mindspore/ccsrc/kernel/rts/label_goto.h b/mindspore/ccsrc/backend/kernel_compiler/rts/label_goto.h
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/label_goto.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/label_goto.h
index efccc12d6f5..2680d916a5a 100644
--- a/mindspore/ccsrc/kernel/rts/label_goto.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/label_goto.h
@@ -19,8 +19,8 @@
 
 #include <memory>
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/label_set.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/label_set.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/label_set.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/label_set.cc
index 5aedd012dc6..909885ff178 100644
--- a/mindspore/ccsrc/kernel/rts/label_set.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/label_set.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/label_set.h"
+#include "backend/kernel_compiler/rts/label_set.h"
 #include <asm-generic/param.h>
 #include <memory>
 #include "runtime/stream.h"
 #include "framework/ge_runtime/task_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 using ge::model_runner::LabelSetTaskInfo;
@@ -55,7 +55,7 @@ std::vector<TaskInfoPtr> LabelSetKernel::GenTask(const std::vector<AddressPtr> &
                                                  const std::vector<AddressPtr> &, uint32_t stream_id) {
   MS_LOG(INFO) << "LabelSetKernel GenTask label:" << label_ << ", stream id:" << stream_id;
   std::vector<TaskInfoPtr> task_info_list;
-  std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(stream_id, label_);
+  std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(kernel_name_, stream_id, label_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   task_info_list.emplace_back(task_info_ptr);
   return task_info_list;
diff --git a/mindspore/ccsrc/kernel/rts/label_set.h b/mindspore/ccsrc/backend/kernel_compiler/rts/label_set.h
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/label_set.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/label_set.h
index d05d81f8985..8d0cfdfb20b 100644
--- a/mindspore/ccsrc/kernel/rts/label_set.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/label_set.h
@@ -19,8 +19,8 @@
 
 #include <memory>
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/label_switch.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/label_switch.cc
similarity index 94%
rename from mindspore/ccsrc/kernel/rts/label_switch.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/label_switch.cc
index fb1ad1601a4..ccb49d94970 100644
--- a/mindspore/ccsrc/kernel/rts/label_switch.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/label_switch.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/label_switch.h"
+#include "backend/kernel_compiler/rts/label_switch.h"
 #include <asm-generic/param.h>
 #include <memory>
 #include <string>
 #include "runtime/stream.h"
 #include "framework/ge_runtime/task_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 using ge::model_runner::LabelSwitchTaskInfo;
@@ -67,7 +67,7 @@ std::vector<TaskInfoPtr> LabelSwitchKernel::GenTask(const std::vector<AddressPtr
   MS_LOG(INFO) << "LabelSwitchKernel GenTask label size:" << label_size_ << ", stream id:" << stream_id;
   std::vector<TaskInfoPtr> task_info_list;
   cond_ = inputs[0]->addr;
-  auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(stream_id, label_size_, label_list_, cond_);
+  auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(kernel_name_, stream_id, label_size_, label_list_, cond_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   task_info_list.emplace_back(task_info_ptr);
   return task_info_list;
diff --git a/mindspore/ccsrc/kernel/rts/label_switch.h b/mindspore/ccsrc/backend/kernel_compiler/rts/label_switch.h
similarity index 94%
rename from mindspore/ccsrc/kernel/rts/label_switch.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/label_switch.h
index 858f851b2ab..1860d38d74a 100644
--- a/mindspore/ccsrc/kernel/rts/label_switch.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/label_switch.h
@@ -19,8 +19,8 @@
 
 #include <memory>
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/memcpy_async.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/memcpy_async.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.cc
index f5fbec6e56d..ca1114a83f7 100644
--- a/mindspore/ccsrc/kernel/rts/memcpy_async.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.cc
@@ -14,15 +14,16 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/memcpy_async.h"
+#include "backend/kernel_compiler/rts/memcpy_async.h"
 
 #include <memory>
 #include <string>
 
 #include "runtime/mem.h"
 #include "common/utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/trans.h"
+#include "utils/context/ms_context.h"
 
 using ge::model_runner::MemcpyAsyncTaskInfo;
 using MemcpyAsyncTaskInfoPtr = std::shared_ptr<MemcpyAsyncTaskInfo>;
@@ -118,8 +119,9 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr
   }
 
   stream_id_ = stream_id;
-  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>(
-    stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE);
+  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr =
+    std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr,
+                                          inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE, NeedDump());
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/rts/memcpy_async.h b/mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.h
similarity index 94%
rename from mindspore/ccsrc/kernel/rts/memcpy_async.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.h
index 94bbf1ca1cb..07a782be500 100644
--- a/mindspore/ccsrc/kernel/rts/memcpy_async.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/memcpy_async.h
@@ -19,8 +19,8 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/profiling_kernel_mod.cc
similarity index 90%
rename from mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/profiling_kernel_mod.cc
index ff005f399bd..8213468b488 100644
--- a/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/profiling_kernel_mod.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/profiling_kernel_mod.h"
+#include "backend/kernel_compiler/rts/profiling_kernel_mod.h"
 
 #include <vector>
 #include <string>
 #include <memory>
 
 #include "framework/ge_runtime/task_info.h"
-#include "device/ascend/profiling/profiling_utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/ascend/profiling/profiling_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 using ProfilerTraceTaskInfo = ge::model_runner::ProfilerTraceTaskInfo;
 using mindspore::device::ascend::ProfilingUtils;
@@ -63,7 +63,7 @@ std::vector<TaskInfoPtr> ProfilingKernelMod::GenTask(const std::vector<AddressPt
                << ", outputs size:" << outputs.size();
   stream_id_ = stream_id;
   std::shared_ptr<ProfilerTraceTaskInfo> task_info_ptr =
-    std::make_shared<ProfilerTraceTaskInfo>(stream_id, log_id_, notify_, flags_);
+    std::make_shared<ProfilerTraceTaskInfo>(kernel_name_, stream_id, log_id_, notify_, flags_);
   return {task_info_ptr};
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/rts/profiling_kernel_mod.h
similarity index 96%
rename from mindspore/ccsrc/kernel/rts/profiling_kernel_mod.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/profiling_kernel_mod.h
index f77f3b5c673..cdb43afb3ea 100644
--- a/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/profiling_kernel_mod.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_RTS_PROFILING_KERNEL_MOD_H_
 #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_RTS_PROFILING_KERNEL_MOD_H_
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
 namespace mindspore {
 namespace kernel {
 class ProfilingKernelMod : public RtKernel {
diff --git a/mindspore/ccsrc/kernel/rts/recv.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/recv.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/rts/recv.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/recv.cc
index c195fd1c92e..cee0ef2fdc4 100644
--- a/mindspore/ccsrc/kernel/rts/recv.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/recv.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/recv.h"
+#include "backend/kernel_compiler/rts/recv.h"
 #include <memory>
 #include "runtime/stream.h"
 #include "utils/context/ms_context.h"
-#include "device/ascend/ascend_stream_assign.h"
+#include "runtime/device/ascend/ascend_stream_assign.h"
 #include "framework/ge_runtime/task_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 namespace mindspore {
@@ -60,7 +60,7 @@ std::vector<TaskInfoPtr> RecvKernel::GenTask(const std::vector<AddressPtr> &, co
                                              const std::vector<AddressPtr> &, uint32_t stream_id) {
   MS_LOG(INFO) << "RecvKernel GenTask event_id_:" << event_id_ << ", stream_id_:" << stream_id;
   stream_id_ = stream_id;
-  EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(stream_id, event_id_);
+  EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(kernel_name_, stream_id, event_id_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/rts/recv.h b/mindspore/ccsrc/backend/kernel_compiler/rts/recv.h
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/recv.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/recv.h
index 68f0b69cc5c..73e0214eaea 100644
--- a/mindspore/ccsrc/kernel/rts/recv.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/recv.h
@@ -19,8 +19,8 @@
 
 #include <memory>
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/rt_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/rts/rt_kernel.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel.cc
index 9e813723830..9279a84cf0c 100644
--- a/mindspore/ccsrc/kernel/rts/rt_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/rt_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel.h
similarity index 95%
rename from mindspore/ccsrc/kernel/rts/rt_kernel.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel.h
index 44d55dca316..dc0aa3e2832 100644
--- a/mindspore/ccsrc/kernel/rts/rt_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel.h
@@ -22,8 +22,8 @@
 #include <memory>
 #include <map>
 #include <string>
-#include "kernel/ascend_kernel_mod.h"
-#include "kernel/task_stream.h"
+#include "backend/kernel_compiler/ascend_kernel_mod.h"
+#include "backend/kernel_compiler/task_stream.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/rt_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_build.cc
similarity index 88%
rename from mindspore/ccsrc/kernel/rts/rt_kernel_build.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_build.cc
index 164605fe9b8..9704a9b97fe 100644
--- a/mindspore/ccsrc/kernel/rts/rt_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_build.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/rt_kernel_build.h"
+#include "backend/kernel_compiler/rts/rt_kernel_build.h"
 
 #include <string>
 #include <memory>
 #include <utility>
 #include <algorithm>
 
-#include "kernel/rts/rt_kernel.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/rt_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_build.h
similarity index 95%
rename from mindspore/ccsrc/kernel/rts/rt_kernel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_build.h
index cbd674b751a..ccfb8d923b5 100644
--- a/mindspore/ccsrc/kernel/rts/rt_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_build.h
@@ -19,7 +19,7 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 namespace mindspore {
 namespace kernel {
 KernelModPtr RtOpBuild(const AnfNodePtr &anf_node);
diff --git a/mindspore/ccsrc/kernel/rts/rt_kernel_info.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/rts/rt_kernel_info.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.cc
index 14f5a60a070..9501aed5f2a 100755
--- a/mindspore/ccsrc/kernel/rts/rt_kernel_info.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 #include <unordered_map>
 #include <algorithm>
 #include "utils/convert_utils.h"
 #include "utils/utils.h"
 #include "common/utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/rt_kernel_info.h b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.h
similarity index 95%
rename from mindspore/ccsrc/kernel/rts/rt_kernel_info.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.h
index ae3753b4c87..6048fb37795 100644
--- a/mindspore/ccsrc/kernel/rts/rt_kernel_info.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.h
@@ -26,8 +26,8 @@
 #include <utility>
 
 #include "ir/dtype.h"
-#include "kernel/kernel_build_info.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/rts/send.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/send.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/send.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/send.cc
index ccdd43ebb61..11c0a7d6682 100644
--- a/mindspore/ccsrc/kernel/rts/send.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/send.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/send.h"
+#include "backend/kernel_compiler/rts/send.h"
 #include <memory>
 #include "runtime/event.h"
 #include "framework/ge_runtime/task_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 using ge::model_runner::EventRecordTaskInfo;
@@ -57,7 +57,7 @@ std::vector<TaskInfoPtr> SendKernel::GenTask(const std::vector<AddressPtr> &, co
                                              const std::vector<AddressPtr> &, uint32_t stream_id) {
   MS_LOG(INFO) << "SendKernel GenTask event id:" << event_id_ << ", stream id:" << stream_id;
   stream_id_ = stream_id;
-  EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(stream_id, event_id_);
+  EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(kernel_name_, stream_id, event_id_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/rts/send.h b/mindspore/ccsrc/backend/kernel_compiler/rts/send.h
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/send.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/send.h
index 5c5b7cf09e6..dbadb1ef44f 100644
--- a/mindspore/ccsrc/kernel/rts/send.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/send.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_RTS_SEND_H
 #include <memory>
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/stream_active.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_active.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/rts/stream_active.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/stream_active.cc
index 4f0895a0be2..e33549973d8 100644
--- a/mindspore/ccsrc/kernel/rts/stream_active.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_active.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/stream_active.h"
+#include "backend/kernel_compiler/rts/stream_active.h"
 #include <asm-generic/param.h>
 #include <memory>
 #include "runtime/stream.h"
 #include "framework/ge_runtime/task_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 using ge::model_runner::StreamActiveTaskInfo;
@@ -72,7 +72,8 @@ std::vector<TaskInfoPtr> StreamActiveKernel::GenTask(const std::vector<AddressPt
   stream_id_ = stream_id;
   std::vector<TaskInfoPtr> task_info_list;
   for (auto &index : active_streams_index_) {
-    std::shared_ptr<StreamActiveTaskInfo> task_info_ptr = std::make_shared<StreamActiveTaskInfo>(stream_id, index);
+    std::shared_ptr<StreamActiveTaskInfo> task_info_ptr =
+      std::make_shared<StreamActiveTaskInfo>(kernel_name_, stream_id, index);
     MS_EXCEPTION_IF_NULL(task_info_ptr);
     task_info_list.emplace_back(task_info_ptr);
     MS_LOG(INFO) << "StreamActiveKernel GenTask: streamId:" << stream_id << ", Active streamId:" << index;
diff --git a/mindspore/ccsrc/kernel/rts/stream_active.h b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_active.h
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/stream_active.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/stream_active.h
index 68c422e7c29..409c3437dcf 100644
--- a/mindspore/ccsrc/kernel/rts/stream_active.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_active.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_RTS_STREAM_ACTIVE_H
 #include <memory>
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/rts/stream_switch.cc b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_switch.cc
similarity index 92%
rename from mindspore/ccsrc/kernel/rts/stream_switch.cc
rename to mindspore/ccsrc/backend/kernel_compiler/rts/stream_switch.cc
index bab6b043669..5fe03b19604 100644
--- a/mindspore/ccsrc/kernel/rts/stream_switch.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_switch.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "kernel/rts/stream_switch.h"
+#include "backend/kernel_compiler/rts/stream_switch.h"
 
 #include <memory>
 #include <vector>
 
 #include "runtime/stream.h"
 #include "framework/ge_runtime/task_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 using ge::model_runner::StreamSwitchTaskInfo;
@@ -91,8 +91,8 @@ std::vector<TaskInfoPtr> StreamSwitchKernel::GenTask(const std::vector<AddressPt
   auto ites_per_loop = inputs[1]->addr;
   MS_LOG(INFO) << "cond_:" << static_cast<int>(cond_) << ", true_stream_index_:" << true_stream_index_
                << ", stream_id:" << stream_id;
-  std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr =
-    std::make_shared<StreamSwitchTaskInfo>(stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_);
+  std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr = std::make_shared<StreamSwitchTaskInfo>(
+    kernel_name_, stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_);
   MS_EXCEPTION_IF_NULL(task_info_ptr);
   return {task_info_ptr};
 }
diff --git a/mindspore/ccsrc/kernel/rts/stream_switch.h b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_switch.h
similarity index 93%
rename from mindspore/ccsrc/kernel/rts/stream_switch.h
rename to mindspore/ccsrc/backend/kernel_compiler/rts/stream_switch.h
index 4e927f30599..64a51f68bf5 100644
--- a/mindspore/ccsrc/kernel/rts/stream_switch.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/rts/stream_switch.h
@@ -19,8 +19,8 @@
 
 #include <memory>
 #include <vector>
-#include "kernel/rts/rt_kernel.h"
-#include "kernel/rts/rt_kernel_info.h"
+#include "backend/kernel_compiler/rts/rt_kernel.h"
+#include "backend/kernel_compiler/rts/rt_kernel_info.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/task_stream.h b/mindspore/ccsrc/backend/kernel_compiler/task_stream.h
similarity index 100%
rename from mindspore/ccsrc/kernel/task_stream.h
rename to mindspore/ccsrc/backend/kernel_compiler/task_stream.h
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc
index c38f48763e6..449a9f45564 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_adapter.h"
+#include "backend/kernel_compiler/tbe/tbe_adapter.h"
 
 #include <map>
 #include <unordered_set>
@@ -23,8 +23,8 @@
 #include <vector>
 #include <algorithm>
 
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/oplib/opinfo.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/opinfo.h"
 
 namespace mindspore {
 namespace kernel {
@@ -84,6 +84,7 @@ static std::map<string, string> tbe_func_adapter_map = {
   {"transpose", "transpose_d"},
   {"fill", "fill_d"},
   {"unsorted_segment_sum", "unsorted_segment_sum_d"},
+  {"unsorted_segment_prod", "unsorted_segment_prod_d"},
   {"concat", "concat_d"},
   {"slice", "slice_d"},
   {"reduce_sum", "reduce_sum_d"},
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_adapter.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h
similarity index 97%
rename from mindspore/ccsrc/kernel/tbe/tbe_adapter.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h
index 51c4cfd777f..aa09efc11f8 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h
@@ -21,8 +21,8 @@
 #include <memory>
 #include <vector>
 #include "nlohmann/json.hpp"
-#include "ir/base.h"
-#include "kernel/oplib/opinfo.h"
+#include "base/base.h"
+#include "backend/kernel_compiler/oplib/opinfo.h"
 // Note: This file is mainly used to adapt the ME front-end operator description and
 //       the TBE back-end operator implementation difference
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
similarity index 97%
rename from mindspore/ccsrc/kernel/tbe/tbe_convert_utils.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
index 90c55572531..e7fd94ef84a 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_convert_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
 
 #include <unordered_map>
 #include <map>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.h
similarity index 94%
rename from mindspore/ccsrc/kernel/tbe/tbe_convert_utils.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.h
index 2c8d3008b9a..dea058cd56c 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_convert_utils.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_KERNEL_TBE_COMMON_UTILS_H_
 
 #include <string>
-#include "kernel/kernel.h"
-#include "ir/base.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "base/base.h"
 #include "ir/dtype/type.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
similarity index 99%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
index 645a195f5ec..73642b291a4 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_kernel_build.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
 #include <memory>
 #include <map>
 #include <algorithm>
-#include "operator/ops.h"
-#include "parallel/ops_info/ops_utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/tbe/tbe_adapter.h"
-#include "kernel/tbe/tbe_python_funcs.h"
-#include "kernel/tbe/tbe_convert_utils.h"
-#include "kernel/tbe/tbe_utils.h"
+#include "frontend/operator/ops.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/tbe/tbe_adapter.h"
+#include "backend/kernel_compiler/tbe/tbe_python_funcs.h"
+#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
similarity index 97%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
index eef02efa87e..768f811055d 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
@@ -25,10 +25,10 @@
 #include <vector>
 #include <nlohmann/json.hpp>
 #include "ir/dtype.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "pybind11/stl.h"
-#include "kernel/oplib/oplib.h"
-#include "kernel/tbe/tbe_adapter.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/tbe/tbe_adapter.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_mod.cc
similarity index 93%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_mod.cc
index 0f377940daf..e6cb4cf30dd 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_mod.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_kernel_mod.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_mod.h"
 #include <algorithm>
 #include "runtime/rt.h"
-#include "nlohmann/json.hpp"
+#include "utils/context/ms_context.h"
 #include "graphengine/inc/framework/ge_runtime/task_info.h"
 
 namespace mindspore {
@@ -99,9 +99,9 @@ std::vector<TaskInfoPtr> TbeKernelMod::GenTask(const std::vector<AddressPtr> &in
 
   MS_LOG(INFO) << "block_dim is:" << block_dim_;
 
-  TbeTaskInfoPtr task_info_ptr =
-    make_shared<ge::model_runner::TbeTaskInfo>(stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0,
-                                               meta_data, input_data_addrs, output_data_addrs, workspace_addrs);
+  TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
+    kernel_name_, stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0, meta_data, input_data_addrs,
+    output_data_addrs, workspace_addrs, NeedDump());
   return {task_info_ptr};
 }
 
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_mod.h
similarity index 95%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_mod.h
index e0e7ab46461..de48c83d9b2 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_mod.h
@@ -21,8 +21,8 @@
 #include <string>
 #include <vector>
 #include <utility>
-#include "kernel/ascend_kernel_mod.h"
-#include "kernel/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/ascend_kernel_mod.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
index 43d492f3976..48223f40c6c 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_kernel_parallel_build.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
 
 #include <memory>
 #include <map>
@@ -24,14 +24,14 @@
 #include <string>
 
 #include "utils/context/ms_context.h"
-#include "kernel/tbe/tbe_adapter.h"
-#include "kernel/tbe/tbe_kernel_build.h"
-#include "kernel/tbe/tbe_kernel_mod.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/tbe/tbe_adapter.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_mod.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "./common.h"
-#include "kernel/tbe/tbe_python_funcs.h"
-#include "kernel/tbe/tbe_convert_utils.h"
-#include "kernel/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_python_funcs.h"
+#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
similarity index 98%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
index 637c03bce31..a29469b47c6 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <map>
 #include <vector>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "pybind11/stl.h"
 #include <nlohmann/json.hpp>
 namespace mindspore {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/common_utils.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h
similarity index 100%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_select/common_utils.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc
index 8050f02f956..c5e882949b3 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h"
 #include "utils/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/tbe/tbe_kernel_select/common_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h
similarity index 96%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h
index af711ddf297..4685df67248 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <utility>
 #include "ir/anf.h"
-#include "kernel/tbe/tbe_kernel_select/common_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc
similarity index 84%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc
index 3f8e5b85c37..61aa9dfb91a 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.cc
@@ -14,17 +14,16 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h"
 #include <string>
 #include <vector>
 #include "utils/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/tbe/tbe_kernel_select/common_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
-constexpr char kAxis[] = "axis";
-constexpr char kTypeInt32[] = "Int32";
 constexpr size_t kInputIndex_0 = 0;
 constexpr size_t kOutputIndex_0 = 0;
 constexpr size_t kChannelN = 0;
@@ -50,7 +49,7 @@ bool TbeKernelReduceSelecter::GetShapeInfo(SupportFormat *support_format) {
   // get keep dim attr
   GetReduceAttrKeepDim();
   // get axis attr
-  GetReduceAttrAxis();
+  axis_ = GetReduceAttrAxis(cnode_ptr_);
   AssignSupportFormat(kOpFormat_DEFAULT, support_format);
   return true;
 }
@@ -121,31 +120,6 @@ bool TbeKernelReduceSelecter::IsFracZAndC1HWNCoC0Common(const std::string &forma
   return true;
 }
 
-void TbeKernelReduceSelecter::GetReduceAttrAxis() {
-  auto primitive = AnfAlgo::GetCNodePrimitive(cnode_ptr_);
-  MS_EXCEPTION_IF_NULL(primitive);
-  auto axis = primitive->GetAttr(kAxis);
-  if (axis == nullptr) {
-    MS_LOG(INFO) << "This node does't have axie attr.";
-    return;
-  }
-  auto type = axis->type();
-  MS_EXCEPTION_IF_NULL(type);
-  std::vector<int> axis_list;
-  if (type->ToString() == kTypeInt32) {
-    axis_list.emplace_back(GetValue<int>(axis));
-  } else {
-    axis_list = GetValue<std::vector<int>>(axis);
-  }
-  for (const auto &elem : axis_list) {
-    if (elem < 0) {
-      axis_.emplace_back(input_shape_.size() + elem);
-    } else {
-      axis_.emplace_back(IntToSize(elem));
-    }
-  }
-}
-
 void TbeKernelReduceSelecter::GetReduceAttrKeepDim() {
   if (!AnfAlgo::HasNodeAttr(kAttrKeepDims, cnode_ptr_)) {
     MS_LOG(INFO) << "This node does't have keep_attr.";
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h
similarity index 94%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h
index e66525fd646..196bb7b06a7 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h
@@ -20,7 +20,7 @@
 #include <string>
 #include <vector>
 #include "ir/anf.h"
-#include "kernel/tbe/tbe_kernel_select/common_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h"
 namespace mindspore {
 namespace kernel {
 class TbeKernelReduceSelecter {
@@ -36,7 +36,6 @@ class TbeKernelReduceSelecter {
 
  private:
   bool IsFracZAndC1HWNCoC0Common(const std::string &format, SupportFormat *support_format) const;
-  void GetReduceAttrAxis();
   void GetReduceAttrKeepDim();
   void AssignSupportFormat(const std::string &support_format_str, SupportFormat *support_format) const;
   bool Is4DShape(const std::vector<size_t> &shape) const;
@@ -44,7 +43,7 @@ class TbeKernelReduceSelecter {
   CNodePtr cnode_ptr_;
   std::vector<size_t> input_shape_{};
   std::vector<size_t> output_shape_{};
-  std::vector<size_t> axis_{};
+  std::vector<int> axis_{};
   bool keep_dims_ = false;
 };
 }  // namespace kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc
new file mode 100644
index 00000000000..21f2347629a
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc
@@ -0,0 +1,622 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h"
+#include <memory>
+#include <map>
+#include <set>
+#include <utility>
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
+#include "nlohmann/json.hpp"
+#include "utils/context/ms_context.h"
+#include "backend/kernel_compiler/tbe/tbe_python_funcs.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h"
+
+namespace mindspore {
+namespace kernel {
+constexpr auto kName = "name";
+constexpr auto kDtype = "dtype";
+constexpr auto kFormat = "format";
+constexpr auto kPrefixInput = "input";
+constexpr auto kPrefixOutput = "output";
+constexpr char kParamTypeDynamic[] = "dynamic";
+constexpr char kParamTypeRequre[] = "required";
+constexpr char kParamTypeOptional[] = "optional";
+void TbeMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) {
+  auto tbe_selecter = TbeKernelSelect(kernel_node, kernel_info_list);
+  tbe_selecter.TbeMetadataInfoEx();
+}
+
+TbeKernelSelect::TbeKernelSelect(CNodePtr kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list)
+    : cnode_ptr_(std::move(kernel_node)), kernel_info_list_(kernel_info_list) {}
+
+void TbeKernelSelect::TbeMetadataInfoEx() {
+  MS_EXCEPTION_IF_NULL(cnode_ptr_);
+  MS_EXCEPTION_IF_NULL(kernel_info_list_);
+  node_name_ = AnfAlgo::GetCNodeName(cnode_ptr_);
+  auto op_info_ptr = OpLib::FindOp(node_name_, kTBE);
+  if (!op_info_ptr) {
+    MS_LOG(INFO) << "Warning: Cann't find tbe core opinfo, node type: " << node_name_;
+    return;
+  }
+  MS_LOG(INFO) << "Start to tbe metadata info. node type: " << node_name_
+               << ", node name: " << cnode_ptr_->fullname_with_scope();
+  OpPattern pattern = op_info_ptr->op_pattern();
+  if (pattern == kCommonPattern) {
+    GetCommonPatternKernelInfo(*op_info_ptr);
+  } else if (pattern == kDynamicFormatPattern) {
+    GetDynamicFormatPatternKernelInfo(*op_info_ptr);
+  } else if (pattern == kFormatAgnosticPattern) {
+    GetAgnosticPatternKernelInfo(*op_info_ptr);
+  } else if (pattern == kBroadcastPattern) {
+    GetBroadcastPatternKernelInfo(*op_info_ptr);
+  } else if (pattern == kReducePattern) {
+    GetReducePatternKernelInfo(*op_info_ptr);
+  } else {
+    MS_LOG(INFO) << "Warning: op pattern is invailed.";
+  }
+  // check support
+  FilterInVaildKernelInfo();
+  MS_LOG(INFO) << "End get kernel build info size: " << kernel_info_list_->size() << ", after tbe select.";
+}
+
+void TbeKernelSelect::GetCommonPatternKernelInfo(const OpInfo &op_info) {
+  MS_LOG(INFO) << "start.";
+  // get dynamic inputs
+  auto primitive = AnfAlgo::GetCNodePrimitive(cnode_ptr_);
+  MS_EXCEPTION_IF_NULL(primitive);
+  std::vector<int> dyn_input_sizes;
+  if (primitive->HasAttr(kAttrDynInputSizes)) {
+    dyn_input_sizes = GetValue<std::vector<int>>(primitive->GetAttr(kAttrDynInputSizes));
+  }
+  // get real input/output num
+  size_t real_input_tensor_num = AnfAlgo::GetInputTensorNum(cnode_ptr_);
+  const auto inputs_info = op_info.inputs_ptr();
+  size_t real_output_tensor_num = AnfAlgo::GetOutputTensorNum(cnode_ptr_);
+  const auto outputs_info = op_info.outputs_ptr();
+  if (inputs_info.empty() && outputs_info.empty()) {
+    MS_LOG(EXCEPTION) << "op info input & output is null, please check.";
+  }
+  // create kernel build info from opinfo
+  size_t kernel_build_info_num =
+    inputs_info.empty() ? outputs_info[0]->dtypes().size() : inputs_info[0]->dtypes().size();
+  for (size_t kernel_build_info_index = 0; kernel_build_info_index < kernel_build_info_num; ++kernel_build_info_index) {
+    auto builder = KernelBuildInfo::KernelBuildInfoBuilder();
+    SetTbeBuildCommonInfo(op_info, &builder);
+    std::vector<std::string> inputs_format;
+    std::vector<TypeId> inputs_device_type;
+    std::vector<std::vector<Axis>> inputs_reshape_type;
+    // input
+    if (!GenBuilderItem(true, kernel_build_info_index, real_input_tensor_num, inputs_info, dyn_input_sizes,
+                        &inputs_format, &inputs_device_type, &inputs_reshape_type)) {
+      break;
+    }
+    builder.SetInputsDeviceType(inputs_device_type);
+    builder.SetInputsFormat(inputs_format);
+    builder.SetInputReshapeType(inputs_reshape_type);
+    // output
+    std::vector<std::string> outputs_format;
+    std::vector<TypeId> outputs_device_type;
+    std::vector<std::vector<Axis>> outputs_reshape_type;
+    if (!GenBuilderItem(false, kernel_build_info_index, real_output_tensor_num, outputs_info, dyn_input_sizes,
+                        &outputs_format, &outputs_device_type, &outputs_reshape_type)) {
+      break;
+    }
+    builder.SetOutputsDeviceType(outputs_device_type);
+    builder.SetOutputsFormat(outputs_format);
+    builder.SetOutputReshapeType(outputs_reshape_type);
+    kernel_info_list_->emplace_back(builder.Build());
+  }
+  MS_LOG(INFO) << "end.";
+}
+
+void TbeKernelSelect::GetDynamicFormatPatternKernelInfo(const OpInfo &op_info) {
+  MS_LOG(INFO) << "start.";
+  //
+  OpInfo op_info_new;
+  CreateNewOpInfo(op_info, &op_info_new);
+  GetCommonPatternKernelInfo(op_info_new);
+  MS_LOG(INFO) << "end.";
+}
+
+void TbeKernelSelect::GetAgnosticPatternKernelInfo(const OpInfo &op_info) {
+  MS_LOG(INFO) << "start.";
+  if (op_info.inputs_ptr().size() != 1) {
+    MS_LOG(EXCEPTION) << "AgnosticPattern only support one input.";
+  }
+  auto format = AnfAlgo::GetPrevNodeOutputFormat(cnode_ptr_, 0);
+  if (kOpFormatList.find(format) == kOpFormatList.end()) {
+    MS_LOG(INFO) << "Got the unknown format " << format;
+    format = kOpFormat_DEFAULT;
+  }
+  SupportFormat support_format;
+  SupportFormatItem input_item;
+  SupportFormatItem output_item;
+  input_item.assign(op_info.inputs_ptr().size(), format);
+  output_item.assign(op_info.outputs_ptr().size(), format);
+  support_format.input_format.emplace_back(input_item);
+  support_format.output_format.emplace_back(output_item);
+  PrintSupportedFormat(support_format);
+  OpInfo op_info_new;
+  CreateNewOpInfo(op_info, support_format, &op_info_new);
+  GetCommonPatternKernelInfo(op_info_new);
+  MS_LOG(INFO) << "end.";
+}
+
+void TbeKernelSelect::GetBroadcastPatternKernelInfo(const OpInfo &op_info) {
+  MS_LOG(INFO) << "start.";
+  auto broadcast_selecter = TbeKernelBroadCastSelecter(cnode_ptr_);
+  SupportFormat support_format;
+  broadcast_selecter.GetShapeInfo(&support_format);
+  if (!broadcast_selecter.IsBroadCastSupport5HD(&support_format)) {
+    MS_LOG(INFO) << "Node(" << node_name_ << ") does not support 5HD.";
+  }
+  if (!broadcast_selecter.IsBroadCastSupportFracZ(&support_format)) {
+    MS_LOG(INFO) << "Node(" << node_name_ << ") does not support FracZ.";
+  }
+  if (!broadcast_selecter.IsBroadCastSupportC1HWNCoC0(&support_format)) {
+    MS_LOG(INFO) << "Node(" << node_name_ << ") does not support C1HWNCoC0.";
+  }
+  if (!broadcast_selecter.IsBroadCastSupportFracNZ(&support_format)) {
+    MS_LOG(INFO) << "Node(" << node_name_ << ") does not support FracNZ.";
+  }
+  PrintSupportedFormat(support_format);
+  OpInfo op_info_new;
+  CreateNewOpInfo(op_info, support_format, &op_info_new);
+  GetCommonPatternKernelInfo(op_info_new);
+  MS_LOG(INFO) << "end.";
+}
+
+void TbeKernelSelect::GetReducePatternKernelInfo(const OpInfo &op_info) {
+  MS_LOG(INFO) << "start.";
+  auto reduce_selecter = TbeKernelReduceSelecter(cnode_ptr_);
+  SupportFormat support_format;
+  reduce_selecter.GetShapeInfo(&support_format);
+  if (!reduce_selecter.IsReduceSupport5HD(&support_format)) {
+    MS_LOG(INFO) << "Node (" << node_name_ << ") reduce not support 5HD.";
+  }
+  if (reduce_selecter.IsReduceSupportFracZ(&support_format)) {
+    MS_LOG(INFO) << "Node (" << node_name_ << ") reduce not support FracZ.";
+  }
+  if (reduce_selecter.IsReduceSupportC1HWNCoC0(&support_format)) {
+    MS_LOG(INFO) << "Node (" << node_name_ << ") reduce not support C1HWNCoC0.";
+  }
+  if (reduce_selecter.IsReduceSupportFracNZ(&support_format)) {
+    MS_LOG(INFO) << "Node (" << node_name_ << ") reduce not support FracNZ.";
+  }
+  PrintSupportedFormat(support_format);
+  OpInfo op_info_new;
+  CreateNewOpInfo(op_info, support_format, &op_info_new);
+  GetCommonPatternKernelInfo(op_info_new);
+  MS_LOG(INFO) << "end.";
+}
+
+void TbeKernelSelect::FilterInVaildKernelInfo() {
+  if (kernel_info_list_->empty()) {
+    MS_LOG(INFO) << "Warning: get kernel build info failed.";
+    return;
+  }
+  auto kernel_build_info_iter = kernel_info_list_->begin();
+  while (kernel_build_info_iter != kernel_info_list_->end()) {
+    if (!FilterInVaildShape(kernel_build_info_iter)) {
+      MS_LOG(INFO) << "Filter invaild shape, filter item info: " << (*kernel_build_info_iter)->ToString();
+      kernel_build_info_iter = kernel_info_list_->erase(kernel_build_info_iter);
+      continue;
+    }
+    if (!TbeCheckSupported(kernel_build_info_iter)) {
+      MS_LOG(INFO) << "Check support shape, filter item info: " << (*kernel_build_info_iter)->ToString();
+      kernel_build_info_iter = kernel_info_list_->erase(kernel_build_info_iter);
+      continue;
+    }
+    kernel_build_info_iter++;
+  }
+}
+
+bool TbeKernelSelect::FilterInVaildShape(
+  const mindspore::kernel::TbeKernelSelect::KernelBuildInfoIter &kernel_build_info_iter) {
+  MS_EXCEPTION_IF_NULL((*kernel_build_info_iter));
+  auto kernel_build_info_inputs_format = (*kernel_build_info_iter)->GetAllInputFormats();
+  for (size_t i = 0; i < kernel_build_info_inputs_format.size(); ++i) {
+    auto shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode_ptr_, i);
+    auto format = kernel_build_info_inputs_format.at(i);
+    if (!IsShapeMatchFormat(shape, format)) {
+      MS_LOG(INFO) << "The " << i << "th input check failed.";
+      return false;
+    }
+  }
+  auto kernel_build_info_outputs_format = (*kernel_build_info_iter)->GetAllOutputFormats();
+  for (size_t j = 0; j < kernel_build_info_outputs_format.size(); ++j) {
+    auto shape = AnfAlgo::GetOutputInferShape(cnode_ptr_, j);
+    auto format = kernel_build_info_outputs_format.at(j);
+    if (!IsShapeMatchFormat(shape, format)) {
+      MS_LOG(INFO) << "The " << j << "th input check failed.";
+      return false;
+    }
+  }
+  return true;
+}
+
+bool TbeKernelSelect::IsShapeMatchFormat(const std::vector<size_t> &shape, const std::string &format) {
+  if (format == kOpFormat_DEFAULT) {
+    return true;
+  }
+  static std::set<std::string> kServerNotSupportFormat = {kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04};
+  // if format is default, it remarkes support all format
+  if (kOpFormatList.find(format) == kOpFormatList.end()) {
+    MS_LOG(EXCEPTION) << "Got the unknown format " << format;
+  }
+  // server not support format with C04 suffix
+  if (std::find(kServerNotSupportFormat.begin(), kServerNotSupportFormat.end(), format) !=
+      kServerNotSupportFormat.end()) {
+    MS_LOG(INFO) << "Warning: Server not support format with C04 suffix.";
+    return false;
+  }
+  // not support format:
+  // 1 NDHWC with shape size != 5
+  // 2 FRAC_NZ with shape size < 2
+  // 3 !NDHWC with shape size > 4
+  if ((format == kOpFormat_NDHWC && shape.size() != kShape5dDims) ||
+      (format == kOpFormat_FRAC_NZ && shape.size() < kShape2dDims) ||
+      (format != kOpFormat_NDHWC && shape.size() > kShape4dDims)) {
+    MS_LOG(INFO) << "Warning: Shape format check failed, format: " << format << ", size: " << shape.size();
+    return false;
+  }
+  return true;
+}
+
+bool TbeKernelSelect::TbeCheckSupported(
+  const mindspore::kernel::TbeKernelSelect::KernelBuildInfoIter &kernel_build_info_iter) {
+  MS_EXCEPTION_IF_NULL((*kernel_build_info_iter));
+  static const std::set<std::string> kCheckSupportedOpType = {parallel::MATMUL,
+                                                              parallel::BATCHMATMUL,
+                                                              parallel::TOPK,
+                                                              parallel::IN_TOPK,
+                                                              parallel::PACK,
+                                                              parallel::UNSORTEF_SEGMENT_MIND,
+                                                              parallel::UNSORTEF_SEGMENT_PRODD,
+                                                              parallel::CAST};
+  auto iter = std::find(kCheckSupportedOpType.begin(), kCheckSupportedOpType.end(), node_name_);
+  if (iter == kCheckSupportedOpType.end()) {
+    return true;
+  }
+  MS_LOG(INFO) << "Check support start.";
+  // replace kernel_info with current kernel info
+  auto kernel_build_info_tmp = AnfAlgo::GetSelectKernelBuildInfo(cnode_ptr_);
+  AnfAlgo::SetSelectKernelBuildInfo(*kernel_build_info_iter, cnode_ptr_.get());
+  nlohmann::json kernel_json;
+  TbeKernelJsonCreator creator(CHECK_SUPPORTED);
+  bool ret = creator.GenTbeSingleKernelJson(cnode_ptr_, &kernel_json);
+  if (!ret) {
+    MS_LOG(EXCEPTION) << "Gen tbe single kernel json for check support failed.";
+  }
+  ret = TbePythonFuncs::CheckSupported(kernel_json);
+  AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_tmp, cnode_ptr_.get());
+  return ret;
+}
+
+void TbeKernelSelect::SetTbeBuildCommonInfo(const mindspore::kernel::OpInfo &op_info,
+                                            mindspore::kernel::KernelBuildInfo::KernelBuildInfoBuilder *builder) {
+  MS_EXCEPTION_IF_NULL(builder);
+  builder->SetProcessor(AICORE);
+  std::string fusion_type = op_info.fusion_type();
+  if (tbe::GetFusionType(fusion_type) != UNKNOWN_FUSION_TYPE) {
+    builder->SetFusionType(tbe::GetFusionType(fusion_type));
+  }
+  builder->SetOpPattern(op_info.op_pattern());
+  builder->SetKernelType(TBE_KERNEL);
+}
+
+bool TbeKernelSelect::GenBuilderItem(bool is_input, size_t kernel_build_info_index, size_t real_io_tensor_num,
+                                     const std::vector<std::shared_ptr<OpIOInfo>> &ios_info,
+                                     const std::vector<int> &dyn_input_sizes, std::vector<std::string> *formats,
+                                     std::vector<TypeId> *device_types, std::vector<std::vector<Axis>> *reshape_types) {
+  MS_EXCEPTION_IF_NULL(formats);
+  MS_EXCEPTION_IF_NULL(device_types);
+  MS_EXCEPTION_IF_NULL(reshape_types);
+  size_t dynamic_input_index = 0;
+  size_t real_io_tensor_index = 0;
+  size_t io_info_index = 0;
+  size_t io_info_num = ios_info.size();
+  for (; io_info_index < io_info_num && real_io_tensor_index < real_io_tensor_num; io_info_index++) {
+    std::shared_ptr<OpIOInfo> io_info_item = ios_info[io_info_index];
+    auto kernel_build_info_dtype = io_info_item->dtypes().at(kernel_build_info_index);
+    std::string kernel_build_info_format;
+    if (!io_info_item->formats().empty()) {
+      kernel_build_info_format = io_info_item->formats().at(kernel_build_info_index);
+    }
+    std::string io_param_type = io_info_item->param_type();
+    std::vector<Axis> reshape_type;
+    StringToAxisVector(io_info_item->reshape_type(), &reshape_type);
+    if (io_param_type == kParamTypeDynamic) {
+      // dynamic io
+      if (is_input) {
+        if (dynamic_input_index >= dyn_input_sizes.size()) {
+          MS_LOG(EXCEPTION) << "dyn_input_sizes attr set error, dynamic_input_index: " << dynamic_input_index
+                            << ", dyn_input_sizes size: " << dyn_input_sizes.size();
+        }
+        int dynamic_input_size = dyn_input_sizes[dynamic_input_index];
+        for (int i = 0; i < dynamic_input_size; ++i) {
+          device_types->emplace_back(tbe::DtypeToTypeId(kernel_build_info_dtype));
+          formats->emplace_back(kernel_build_info_format);
+          reshape_types->emplace_back(reshape_type);
+        }
+        dynamic_input_index++;
+        real_io_tensor_index += dynamic_input_size;
+      } else {
+        if (ios_info.size() != 1) {
+          MS_LOG(EXCEPTION) << "if output is dynamic, so output must has one output.";
+        }
+        for (size_t i = 0; i < real_io_tensor_num; ++i) {
+          device_types->emplace_back(tbe::DtypeToTypeId(kernel_build_info_dtype));
+          formats->emplace_back(kernel_build_info_format);
+          reshape_types->emplace_back(reshape_type);
+        }
+        real_io_tensor_index += real_io_tensor_num;
+      }
+    } else if (io_param_type == kParamTypeRequre || io_param_type == kParamTypeOptional) {
+      // requre or optional io
+      device_types->emplace_back(tbe::DtypeToTypeId(kernel_build_info_dtype));
+      formats->emplace_back(kernel_build_info_format);
+      reshape_types->emplace_back(reshape_type);
+      real_io_tensor_index++;
+    } else {
+      MS_LOG(EXCEPTION) << "op info's param type is not match: " << io_param_type;
+    }
+  }
+
+  if (io_info_index != io_info_num) {
+    MS_LOG(INFO) << "Warning: io_info_index(" << io_info_index << ") != io_info_num(" << io_info_num
+                 << "), this node may has optional input/output.";
+  }
+  if (real_io_tensor_index != real_io_tensor_num) {
+    std::string io_type = is_input ? "inputs " : "outputs";
+    MS_LOG(INFO) << node_name_ << "'s " << io_type << "op io info num: " << io_info_num
+                 << ", real io tensor num:" << real_io_tensor_num << "real_io_tensor_index(" << real_io_tensor_index
+                 << ") != real_io_tensor_num(" << real_io_tensor_num << ")";
+    return false;
+  }
+  return true;
+}
+
+void TbeKernelSelect::StringToAxisVector(const std::string &reshape_type_str, std::vector<Axis> *reshape_type_vec) {
+  MS_EXCEPTION_IF_NULL(reshape_type_vec);
+  for (const auto &c : reshape_type_str) {
+    switch (c) {
+      case 'N':
+        reshape_type_vec->push_back(kernel::N);
+        break;
+      case 'C':
+        reshape_type_vec->push_back(kernel::C);
+        break;
+      case 'H':
+        reshape_type_vec->push_back(kernel::H);
+        break;
+      case 'W':
+        reshape_type_vec->push_back(kernel::W);
+        break;
+      default:
+        MS_LOG(EXCEPTION) << "Unknown axis " << c << "in reshape type.";
+    }
+  }
+}
+
+void TbeKernelSelect::CreateNewOpIOInfo(const mindspore::kernel::OpIOInfo &op_io_info,
+                                        const std::vector<std::vector<std::string>> &support_format_item, size_t index,
+                                        mindspore::kernel::OpIOInfo *op_io_info_new) {
+  MS_EXCEPTION_IF_NULL(op_io_info_new);
+  op_io_info_new->set_index(op_io_info.index());
+  op_io_info_new->set_name(op_io_info.name());
+  op_io_info_new->set_param_type(op_io_info.param_type());
+  op_io_info_new->set_need_compile(op_io_info.need_compile());
+  op_io_info_new->set_reshape_type(op_io_info.reshape_type());
+  op_io_info_new->set_shape(op_io_info.shape());
+  // dtype
+  std::vector<std::string> dtype_new;
+  auto dtype = op_io_info.dtypes();
+  for (size_t i = 0; i < support_format_item.size(); ++i) {
+    dtype_new.insert(dtype_new.end(), dtype.begin(), dtype.end());
+  }
+  op_io_info_new->set_dtypes(dtype_new);
+  // format
+  std::vector<std::string> format_new;
+  for (const auto &formats : support_format_item) {
+    auto format = formats.at(index);
+    for (size_t j = 0; j < dtype.size(); ++j) {
+      format_new.emplace_back(format);
+    }
+  }
+  op_io_info_new->set_formats(format_new);
+}
+
+std::vector<std::string> TbeKernelSelect::SplitStrToVec(const std::string &op_select_json_item) {
+  const std::map<std::string, std::string> kDynamicFormatMap = {
+    {"NCHW", "DefaultFormat"}, {"ND", "DefaultFormat"}, {"FRACTAL_Z", "FracZ"}};
+  if (op_select_json_item.empty()) {
+    MS_LOG(EXCEPTION) << "Op select ret item is null.";
+  }
+  const char space = ' ';
+  const char sep = ',';
+  std::string op_select_tmp = op_select_json_item + ",";
+  std::vector<std::string> ret;
+  auto begin = op_select_tmp.find_first_not_of(space, 0);
+  auto sep_pos = op_select_tmp.find(sep);
+  if (begin >= sep_pos) {
+    MS_LOG(EXCEPTION) << "Select ret json is error.";
+  }
+  while (sep_pos != std::string::npos) {
+    auto obj = op_select_tmp.substr(begin, sep_pos - begin);
+    if (kDynamicFormatMap.find(obj) != kDynamicFormatMap.end()) {
+      obj = kDynamicFormatMap.at(obj);
+    }
+    ret.emplace_back(obj);
+    begin = op_select_tmp.find_first_not_of(space, sep_pos + 1);
+    sep_pos = op_select_tmp.find(sep, begin);
+  }
+  return ret;
+}
+
+std::string TbeKernelSelect::OpSelectFormat() {
+  nlohmann::json kernel_json;
+  std::string res_json_str;
+  TbeKernelJsonCreator creator(OP_SELECT_FORMAT);
+  bool ret = creator.GenTbeSingleKernelJson(cnode_ptr_, &kernel_json);
+  if (!ret) {
+    MS_LOG(EXCEPTION) << "GenTbeSingleKernelJson failed.";
+  }
+  res_json_str = TbePythonFuncs::OpSelectFormat(kernel_json);
+  if (res_json_str.empty()) {
+    MS_LOG(EXCEPTION) << "op select format error.";
+  }
+  MS_LOG(INFO) << "Dynamic select foramt response result:" << res_json_str;
+  return res_json_str;
+}
+
+void TbeKernelSelect::CreateNewOpInfo(const mindspore::kernel::OpInfo &op_info, const SupportFormat &support_format,
+                                      mindspore::kernel::OpInfo *op_info_new) {
+  MS_EXCEPTION_IF_NULL(op_info_new);
+  if (op_info.inputs_ptr().size() != support_format.input_format[0].size() ||
+      op_info.outputs_ptr().size() != support_format.output_format[0].size()) {
+    MS_LOG(EXCEPTION) << "BroadCast input/output size not match, op info input size:" << op_info.inputs_ptr().size()
+                      << ", input support size: " << support_format.input_format[0].size()
+                      << ", op info output size: " << op_info.outputs_ptr().size()
+                      << ", output support size: " << support_format.output_format[0].size();
+  }
+  *op_info_new = op_info;
+  op_info_new->ClearInputs();
+  op_info_new->ClearOutputs();
+  for (size_t i = 0; i < op_info.inputs_ptr().size(); ++i) {
+    auto input = op_info.inputs_ptr().at(i);
+    auto input_new = std::make_shared<OpIOInfo>();
+    CreateNewOpIOInfo(*input, support_format.input_format, i, input_new.get());
+    op_info_new->add_inputs_ptr(input_new);
+  }
+  for (size_t j = 0; j < op_info.outputs_ptr().size(); ++j) {
+    auto output = op_info.outputs_ptr().at(j);
+    auto output_new = std::make_shared<OpIOInfo>();
+    CreateNewOpIOInfo(*output, support_format.output_format, j, output_new.get());
+    op_info_new->add_outputs_ptr(output_new);
+  }
+}
+
+struct SelectOpIOInfo {
+  std::string name;
+  std::vector<std::string> dtypes;
+  std::vector<std::string> formats;
+};
+
+void TbeKernelSelect::CreateNewOpInfo(const mindspore::kernel::OpInfo &op_info,
+                                      mindspore::kernel::OpInfo *op_info_new) {
+  MS_EXCEPTION_IF_NULL(op_info_new);
+  auto op_seclect_json = OpSelectFormat();
+  if (!op_seclect_json.empty()) {
+    nlohmann::json json_obj = nlohmann::json::parse(op_seclect_json);
+    if (!json_obj.is_object()) {
+      MS_LOG(EXCEPTION) << "JsonStr is not an object, the jsonStr is:" << op_seclect_json;
+    }
+    std::vector<SelectOpIOInfo> inputs;
+    std::vector<SelectOpIOInfo> outputs;
+    for (const auto &item : json_obj.items()) {
+      const std::string &item_name = item.key();
+      bool is_input = (item_name.find(kPrefixInput) != std::string::npos);
+      bool is_output = (item_name.find(kPrefixOutput) != std::string::npos);
+      if (!is_input && !is_output) {
+        MS_LOG(EXCEPTION) << "op select ret json is error.";
+      }
+      if (is_input) {
+        SelectOpIOInfo select_input;
+        select_input.name = item.value().at(kName);
+        std::string input_dtype_item = item.value().at(kDtype);
+        select_input.dtypes = SplitStrToVec(input_dtype_item);
+        std::string input_format_item = item.value().at(kFormat);
+        select_input.formats = SplitStrToVec(input_format_item);
+        inputs.emplace_back(select_input);
+      } else if (is_output) {
+        SelectOpIOInfo select_output;
+        select_output.name = item.value().at(kName);
+        std::string input_dtype_item = item.value().at(kDtype);
+        select_output.dtypes = SplitStrToVec(input_dtype_item);
+        std::string input_format_item = item.value().at(kFormat);
+        select_output.formats = SplitStrToVec(input_format_item);
+        outputs.emplace_back(select_output);
+      }
+    }
+
+    if (op_info.inputs_ptr().size() != inputs.size() || op_info.outputs_ptr().size() != outputs.size()) {
+      MS_LOG(EXCEPTION) << "select format input/output size not equal, please check register.";
+    }
+
+    *op_info_new = op_info;
+    op_info_new->ClearInputs();
+    op_info_new->ClearOutputs();
+    for (size_t i = 0; i < op_info.inputs_ptr().size(); ++i) {
+      auto input_new = std::make_shared<OpIOInfo>();
+      CreateNewOpIOInfo(*op_info.inputs_ptr().at(i), inputs.at(i).dtypes, inputs.at(i).formats, input_new.get());
+      op_info_new->add_inputs_ptr(input_new);
+    }
+    for (size_t i = 0; i < op_info.outputs_ptr().size(); ++i) {
+      auto output_new = std::make_shared<OpIOInfo>();
+      CreateNewOpIOInfo(*op_info.outputs_ptr().at(i), outputs.at(i).dtypes, outputs.at(i).formats, output_new.get());
+      op_info_new->add_outputs_ptr(output_new);
+    }
+  }
+}
+
+void TbeKernelSelect::CreateNewOpIOInfo(const mindspore::kernel::OpIOInfo &op_io_info,
+                                        const std::vector<std::string> &support_dtype,
+                                        const std::vector<std::string> &support_format,
+                                        mindspore::kernel::OpIOInfo *op_io_info_new) {
+  MS_EXCEPTION_IF_NULL(op_io_info_new);
+  op_io_info_new->set_index(op_io_info.index());
+  op_io_info_new->set_name(op_io_info.name());
+  op_io_info_new->set_param_type(op_io_info.param_type());
+  op_io_info_new->set_need_compile(op_io_info.need_compile());
+  op_io_info_new->set_reshape_type(op_io_info.reshape_type());
+  op_io_info_new->set_shape(op_io_info.shape());
+  // dtype  && format
+  op_io_info_new->set_dtypes(support_dtype);
+  op_io_info_new->set_formats(support_format);
+}
+
+void TbeKernelSelect::PrintSupportedFormat(const SupportFormat &support_format) {
+  if (support_format.input_format.size() != support_format.output_format.size()) {
+    MS_LOG(EXCEPTION) << "Input(" << support_format.input_format.size() << ")Output("
+                      << support_format.output_format.size() << ") size not match.";
+  }
+  for (size_t i = 0; i < support_format.input_format.size(); ++i) {
+    auto input_items = support_format.input_format.at(i);
+    auto output_items = support_format.output_format.at(i);
+    std::string print_str = "[";
+    for (const auto &input : input_items) {
+      print_str.append(input);
+      print_str.append(", ");
+    }
+    print_str.append("] -->");
+    for (const auto &output : output_items) {
+      print_str.append(output);
+      print_str.append(", ");
+    }
+    MS_LOG(INFO) << "Support format: " << print_str;
+  }
+}
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h
similarity index 95%
rename from mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h
index c400bdbb6f8..679c56379f8 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h
@@ -20,9 +20,9 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include "kernel/oplib/opinfo.h"
-#include "kernel/kernel_build_info.h"
-#include "kernel/tbe/tbe_kernel_select/common_utils.h"
+#include "backend/kernel_compiler/oplib/opinfo.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_python_funcs.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_python_funcs.cc
similarity index 98%
rename from mindspore/ccsrc/kernel/tbe/tbe_python_funcs.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_python_funcs.cc
index 7204fb7f960..facb07991a9 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_python_funcs.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_python_funcs.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_python_funcs.h"
-#include "kernel/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_python_funcs.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
 #include "common/utils.h"
 #include "utils/context/ms_context.h"
 
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_python_funcs.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_python_funcs.h
similarity index 100%
rename from mindspore/ccsrc/kernel/tbe/tbe_python_funcs.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_python_funcs.h
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc
similarity index 96%
rename from mindspore/ccsrc/kernel/tbe/tbe_utils.cc
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc
index ae7e5cb6d5e..76ef7b08d52 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "kernel/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
 
 #include <sys/types.h>
 #include <dirent.h>
@@ -27,15 +27,15 @@
 #include <fstream>
 
 #include "runtime/kernel.h"
-#include "kernel/oplib/oplib.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 #include "utils/utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 #include "ir/dtype/type.h"
-#include "kernel/tbe/tbe_convert_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
 #include "securec/include/securec.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_utils.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h
similarity index 96%
rename from mindspore/ccsrc/kernel/tbe/tbe_utils.h
rename to mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h
index 56fbe7967a4..39ddaaa73d6 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_utils.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h
@@ -23,9 +23,9 @@
 #include <map>
 #include <unordered_map>
 
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include "ir/anf.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/mindspore/ccsrc/pre_activate/CMakeLists.txt b/mindspore/ccsrc/backend/optimizer/CMakeLists.txt
similarity index 84%
rename from mindspore/ccsrc/pre_activate/CMakeLists.txt
rename to mindspore/ccsrc/backend/optimizer/CMakeLists.txt
index 239757fb177..ee1532a4162 100644
--- a/mindspore/ccsrc/pre_activate/CMakeLists.txt
+++ b/mindspore/ccsrc/backend/optimizer/CMakeLists.txt
@@ -11,4 +11,4 @@ if (ENABLE_D)
 endif ()
 
 set_property(SOURCE ${_PREACTIVATE_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PRE_ACT)
-add_library(_mindspore_pre_activate_obj OBJECT ${_PREACTIVATE_SRC_LIST})
+add_library(_mindspore_backend_optimizer_obj OBJECT ${_PREACTIVATE_SRC_LIST})
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
new file mode 100644
index 00000000000..64d76ab358e
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
@@ -0,0 +1,498 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/ascend/ascend_backend_optimization.h"
+#include <memory>
+#include <string>
+#include <set>
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fission/bn_split.h"
+#include "backend/optimizer/ascend/ir_fission/bn_grad_split.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_grad_split.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.h"
+#include "backend/optimizer/ascend/ir_fission/single_batch_norm_fission.h"
+#include "backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h"
+#include "backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.h"
+#include "backend/optimizer/ascend/ir_fission/layer_norm_grad_split.h"
+#include "backend/optimizer/pass/communication_op_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/square_sum_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.h"
+#include "backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/refresh_parameter_format.h"
+#include "backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.h"
+#include "backend/optimizer/ascend/ir_fission/transdata_split.h"
+#include "backend/optimizer/ascend/ir_fission/topk_split.h"
+#include "backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/mul_add_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/mul_addn_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/remove_reshape_pair.h"
+#include "backend/optimizer/ascend/ir_fusion/derelu_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.h"
+#include "backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h"
+#include "backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.h"
+#include "backend/optimizer/ascend/format_type/insert_trans_op.h"
+#include "backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.h"
+#include "backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h"
+#include "backend/optimizer/pass/getitem_tuple.h"
+#include "backend/optimizer/pass/optimize_dependence.h"
+#include "backend/optimizer/pass/erase_visit_attr.h"
+#include "backend/optimizer/ascend/format_type/insert_cast.h"
+#include "backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.h"
+#include "backend/optimizer/pass/eliminate_redundant_op.h"
+#include "backend/optimizer/pass/common_subexpression_elimination.h"
+#include "backend/optimizer/pass/fuse_graph_kernel.h"
+#include "backend/optimizer/pass/fuse_basic.h"
+#include "backend/optimizer/pass/add_atomic_clean.h"
+#include "backend/optimizer/ascend/format_type/merge_cast_to_op.h"
+#include "backend/optimizer/ascend/format_type/check_consistency.h"
+#include "backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.h"
+#include "backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.h"
+#include "backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.h"
+#include "backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.h"
+#include "backend/optimizer/ascend/format_type/insert_transdata_for_runop.h"
+#include "backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.h"
+#include "backend/optimizer/ascend/ir_fission/addn_fission.h"
+#include "backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.h"
+#include "backend/optimizer/ascend/ir_fission/split_fission.h"
+#include "backend/optimizer/ascend/format_type/modify_ops_attrs.h"
+#include "backend/optimizer/ascend/format_type/remove_no_use_reshape_op.h"
+#include "backend/optimizer/ascend/ir_fusion/add_input_to_output.h"
+#include "utils/context/ms_context.h"
+#include "utils/config_manager.h"
+#include "debug/anf_ir_dump.h"
+#include "debug/anf_ir_utils.h"
+
+namespace mindspore {
+namespace opt {
+namespace {
+void AddAscendBackendOptionalIRFusion(PassManager *ir_fusion_pm) {
+  MS_EXCEPTION_IF_NULL(ir_fusion_pm);
+  ir_fusion_pm->AddPass(std::make_shared<BatchNormBertFission>());
+  ir_fusion_pm->AddPass(std::make_shared<SingleBatchNormFission>());
+  ir_fusion_pm->AddPass(std::make_shared<SquareSumFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<ClipByNormNoDivSquareSumFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<LambUpdateWithLRRuleFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<SoftmaxGradExtFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<SoftmaxGradExtFusionV2>());
+  ir_fusion_pm->AddPass(std::make_shared<SoftmaxGradExtFusionV3>());
+  ir_fusion_pm->AddPass(std::make_shared<ConfusionMulGradFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<ConfusionSoftmaxGradRule>());
+  ir_fusion_pm->AddPass(std::make_shared<LambNextMVWithDecayRuleCond1>());
+  ir_fusion_pm->AddPass(std::make_shared<LambNextMVWithDecayRuleCond2>());
+  ir_fusion_pm->AddPass(std::make_shared<LambNextMVWithDecayRuleCond3>());
+  ir_fusion_pm->AddPass(std::make_shared<LambNextMVWithDecayRuleCond4>());
+  ir_fusion_pm->AddPass(std::make_shared<LambNextMVRuleCond1>());
+  ir_fusion_pm->AddPass(std::make_shared<LambNextMVRuleCond2>());
+  ir_fusion_pm->AddPass(std::make_shared<LambNextMVRuleCond3>());
+  ir_fusion_pm->AddPass(std::make_shared<LambNextMVRuleCond4>());
+  ir_fusion_pm->AddPass(std::make_shared<LambNextRightRule>());
+  ir_fusion_pm->AddPass(std::make_shared<LambUpdateWithLrV2>());
+  ir_fusion_pm->AddPass(std::make_shared<ReshapeTransposeFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<TransposeReshapeFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<ClipByValueFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<TopKSplit>());
+  ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneCond1Fusion>());
+  ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneCond2Fusion>());
+  ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneCond3Fusion>());
+  ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneCond4Fusion>());
+  ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneWithDecayRuleCond1>());
+  ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneWithDecayRuleCond2>());
+  ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneWithDecayRuleCond3>());
+  ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneWithDecayRuleCond4>());
+  ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneWithDecayRuleCond5>());
+  ir_fusion_pm->AddPass(std::make_shared<MomentumLossscaleFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<MulAddFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<MulAddNFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<MatmulBiasaddFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<AddnFission>());
+  ir_fusion_pm->AddPass(std::make_shared<DereluFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<TransposeTransDataFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<GetitemTuple>());
+  ir_fusion_pm->AddPass(std::make_shared<BatchNorm2BNInfer>());
+  ir_fusion_pm->AddPass(std::make_shared<BatchNormGrad2BNInferGrad>());
+  ir_fusion_pm->AddPass(std::make_shared<BatchNormGradInferFission>());
+  ir_fusion_pm->AddPass(std::make_shared<SplitFission>());
+  ir_fusion_pm->AddPass(std::make_shared<TensorScatterUpdateFission>());
+  ir_fusion_pm->AddPass(std::make_shared<GetitemTuple>());
+}
+}  // namespace
+
+void RunOpAscendDataLayout(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+  auto optimizer = std::make_shared<GraphOptimizer>();
+  auto data_layout_pm = std::make_shared<PassManager>("pynative_transop_pm");
+  data_layout_pm->AddPass(std::make_shared<ChangeAxisOfReduceKernel>());
+  data_layout_pm->AddPass(std::make_shared<RectifyDoMaskKernelInfo>());
+  data_layout_pm->AddPass(std::make_shared<RunOpInsertTransData>());
+  data_layout_pm->AddPass(std::make_shared<GetitemTuple>());
+  data_layout_pm->AddPass(std::make_shared<CommonSubexpressionElimination>());
+  data_layout_pm->AddPass(std::make_shared<EliminateRedundantOp>());
+  data_layout_pm->AddPass(std::make_shared<OptimizeDependence>());
+  data_layout_pm->AddPass(std::make_shared<TransDataSplit>());
+  data_layout_pm->AddPass(std::make_shared<EraseVisitAttr>());
+  optimizer->AddPassManager(data_layout_pm);
+  (void)optimizer->Optimize(kernel_graph);
+  kernel_graph->SetExecOrderByDefault();
+}
+
+void AscendGraphKernelCommonProcess(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+  auto optimizer = std::make_shared<GraphOptimizer>();
+  MS_EXCEPTION_IF_NULL(optimizer);
+  auto common_process = std::make_shared<PassManager>("graph_kernel_common_process");
+  MS_EXCEPTION_IF_NULL(common_process);
+  common_process->AddPass(std::make_shared<ModifyOpAttrs>());
+  common_process->AddPass(std::make_shared<RemoveNoUseReshapeOp>());
+  optimizer->AddPassManager(common_process);
+  (void)optimizer->Optimize(kernel_graph);
+  kernel_graph->SetExecOrderByDefault();
+}
+
+void AscendDataLayout(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+  auto optimizer = std::make_shared<GraphOptimizer>();
+  auto data_layout_pm = std::make_shared<PassManager>("transop_pm");
+  data_layout_pm->AddPass(std::make_shared<RectifyDoMaskKernelInfo>());
+  data_layout_pm->AddPass(std::make_shared<InsertTransOp>());
+  data_layout_pm->AddPass(std::make_shared<GetitemTuple>());
+  data_layout_pm->AddPass(std::make_shared<CommonSubexpressionElimination>());
+  data_layout_pm->AddPass(std::make_shared<RemoveReshapePair>());
+  data_layout_pm->AddPass(std::make_shared<EliminateRedundantOp>());
+  data_layout_pm->AddPass(std::make_shared<OptimizeDependence>());
+  data_layout_pm->AddPass(std::make_shared<TransDataSplit>());
+  data_layout_pm->AddPass(std::make_shared<EraseVisitAttr>());
+  optimizer->AddPassManager(data_layout_pm);
+  (void)optimizer->Optimize(kernel_graph);
+  kernel_graph->SetExecOrderByDefault();
+}
+
+void AscendMixPrecision(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+  auto optimizer = std::make_shared<GraphOptimizer>();
+  auto mixed_precision_pm = std::make_shared<PassManager>("cast_pm");
+  mixed_precision_pm->AddPass(std::make_shared<InsertCast>());
+  mixed_precision_pm->AddPass(std::make_shared<GetitemTuple>());
+  mixed_precision_pm->AddPass(std::make_shared<CommonSubexpressionElimination>());
+  mixed_precision_pm->AddPass(std::make_shared<EliminateRedundantOp>());
+  mixed_precision_pm->AddPass(std::make_shared<OptimizeDependence>());
+  mixed_precision_pm->AddPass(std::make_shared<EraseVisitAttr>());
+  mixed_precision_pm->AddPass(std::make_shared<DealRefTransAndCast>());
+  mixed_precision_pm->AddPass(std::make_shared<GetitemTuple>());
+  mixed_precision_pm->AddPass(std::make_shared<MergeCastToOp>());
+  mixed_precision_pm->AddPass(std::make_shared<LayerNormBetaGammaBackpropFusion>());
+  mixed_precision_pm->AddPass(std::make_shared<EraseVisitAttr>());
+  mixed_precision_pm->AddPass(std::make_shared<ConvertUnSupportNodeToAICPU>());
+  optimizer->AddPassManager(mixed_precision_pm);
+  (void)optimizer->Optimize(kernel_graph);
+  kernel_graph->SetExecOrderByDefault();
+}
+
+void AscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  bool save_graphs = context_ptr->save_graphs_flag();
+  auto save_graphs_path = context_ptr->save_graphs_path();
+  if (save_graphs_path.empty()) {
+    save_graphs_path = ".";
+  }
+  if (save_graphs) {
+    std::string file_path = save_graphs_path + "/" + "hwopt_d_ir_fusion_before" + "_graph_" +
+                            std::to_string(kernel_graph->graph_id()) + ".ir";
+    DumpIR(file_path, kernel_graph);
+    DumpIRProto(kernel_graph, "before_hwopt_" + std::to_string(kernel_graph->graph_id()));
+  }
+  auto optimizer = std::make_shared<GraphOptimizer>();
+  auto ir_fusion_pm = std::make_shared<PassManager>("ir_fusion_pm");
+  if (context_ptr->execution_mode() == kPynativeMode) {
+    ir_fusion_pm->AddPass(std::make_shared<BnSplit>());
+    ir_fusion_pm->AddPass(std::make_shared<BnGradSplit>());
+  } else {
+    ir_fusion_pm->AddPass(std::make_shared<BatchNormGradSplit>());
+    ir_fusion_pm->AddPass(std::make_shared<LayerNormGradSplit>());
+    ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormFusion>());
+    ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormMixPrecisionFusion0>());
+    ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormMixPrecisionFusion1>());
+  }
+  ir_fusion_pm->AddPass(std::make_shared<InsertPadForNMSWithMask>());
+  if (context_ptr->ir_fusion_flag()) {
+    AddAscendBackendOptionalIRFusion(ir_fusion_pm.get());
+  }
+
+  if (context_ptr->enable_task_sink() && context_ptr->loop_sink_flag() && ConfigManager::GetInstance().iter_num() > 1) {
+    ir_fusion_pm->AddPass(std::make_shared<InsertMemcpyAsyncForGetNext>());
+    ir_fusion_pm->AddPass(std::make_shared<GetitemTuple>());
+    ir_fusion_pm->AddPass(std::make_shared<EraseVisitAttr>());
+  }
+  ir_fusion_pm->AddPass(std::make_shared<InsertMemcpyAsyncForHcclOp>());
+  ir_fusion_pm->AddPass(std::make_shared<AddInputToOutput>());
+  optimizer->AddPassManager(ir_fusion_pm);
+  (void)optimizer->Optimize(kernel_graph);
+  kernel_graph->SetExecOrderByDefault();
+  if (save_graphs) {
+    std::string file_path =
+      save_graphs_path + "/" + "hwopt_d_ir_fusion_after" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir";
+    DumpIR(file_path, kernel_graph);
+  }
+}
+
+void RunOpAscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  if (!context_ptr->ir_fusion_flag()) {
+    MS_LOG(INFO) << "IRFusion is not enable, skip";
+    return;
+  }
+  bool save_graphs = context_ptr->save_graphs_flag();
+  auto save_graphs_path = context_ptr->save_graphs_path();
+  if (save_graphs_path.empty()) {
+    save_graphs_path = ".";
+  }
+  if (save_graphs) {
+    std::string file_path = save_graphs_path + "/" + "hwopt_d_ir_fusion_before.ir";
+    DumpIR(file_path, kernel_graph);
+  }
+  auto optimizer = std::make_shared<GraphOptimizer>();
+  auto ir_fusion_pm = std::make_shared<PassManager>("ir_fusion_pm");
+  ir_fusion_pm->AddPass(std::make_shared<BnSplit>());
+  ir_fusion_pm->AddPass(std::make_shared<LayerNormGradSplit>());
+  ir_fusion_pm->AddPass(std::make_shared<TopKSplit>());
+  ir_fusion_pm->AddPass(std::make_shared<AddnFission>());
+  ir_fusion_pm->AddPass(std::make_shared<InsertPadForNMSWithMask>());
+  ir_fusion_pm->AddPass(std::make_shared<TensorScatterUpdateFission>());
+
+  optimizer->AddPassManager(ir_fusion_pm);
+  (void)optimizer->Optimize(kernel_graph);
+  kernel_graph->SetExecOrderByDefault();
+  if (save_graphs) {
+    std::string file_path = save_graphs_path + "/" + "hwopt_d_ir_fusion_after.ir";
+    DumpIR(file_path, kernel_graph);
+  }
+}
+
+void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  bool save_graphs = context_ptr->save_graphs_flag();
+  auto save_graphs_path = context_ptr->save_graphs_path();
+  if (save_graphs_path.empty()) {
+    save_graphs_path = ".";
+  }
+  if (save_graphs) {
+    std::string file_path =
+      save_graphs_path + "/" + "hwopt_d_before" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir";
+    DumpIR(file_path, kernel_graph);
+  }
+  // data layout optimization
+  AscendDataLayout(kernel_graph);
+  // mixed precision optimization
+  AscendMixPrecision(kernel_graph);
+  // other optimization
+  auto optimizer = std::make_shared<GraphOptimizer>();
+  auto other_pm = std::make_shared<PassManager>("other_pm");
+  other_pm->AddPass(std::make_shared<AllReduceFusion>());
+  other_pm->AddPass(std::make_shared<AllGatherFusion>());
+  other_pm->AddPass(std::make_shared<ReduceScatterFusion>());
+  other_pm->AddPass(std::make_shared<BroadcastFusion>());
+  other_pm->AddPass(std::make_shared<ParameterTransOpFusion>());
+  other_pm->AddPass(std::make_shared<RefreshParameterFormat>());
+  optimizer->AddPassManager(other_pm);
+  (void)optimizer->Optimize(kernel_graph);
+  kernel_graph->SetExecOrderByDefault();
+  // buffer fusion
+  AscendBackendUBFusionOptimization(kernel_graph);
+
+  // other2 optimization
+  auto optimizer2 = std::make_shared<GraphOptimizer>();
+  auto other2_pm = std::make_shared<PassManager>("other2_pm");
+  other2_pm->AddPass(std::make_shared<GetitemTuple>());
+  other2_pm->AddPass(std::make_shared<CommonSubexpressionElimination>());
+  if (context_ptr->enable_task_sink() && context_ptr->loop_sink_flag() && ConfigManager::GetInstance().iter_num() > 1) {
+    other2_pm->AddPass(std::make_shared<GetnextMemcpyElimination>());
+  }
+  other2_pm->AddPass(std::make_shared<CheckConsistency>());
+  optimizer2->AddPassManager(other2_pm);
+  (void)optimizer2->Optimize(kernel_graph);
+  kernel_graph->SetExecOrderByDefault();
+
+  if (save_graphs) {
+    std::string file_path =
+      save_graphs_path + "/" + "hwopt_d_end" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir";
+    DumpIR(file_path, kernel_graph, true);
+    DumpIRProto(kernel_graph, "after_hwopt_" + std::to_string(kernel_graph->graph_id()));
+    kernel_graph->DumpFuncGraph("hwopt_d_end");
+  }
+}
+
+void AscendBackendGraphKernelOpt(const std::shared_ptr<session::KernelGraph> &kernel_graph,
+                                 bool is_before_kernel_select) {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  if (!(context_ptr->enable_graph_kernel())) {
+    return;
+  }
+  bool save_graphs = context_ptr->save_graphs_flag();
+  auto save_graphs_path = context_ptr->save_graphs_path();
+  if (save_graphs_path.empty()) {
+    save_graphs_path = ".";
+  }
+  if (save_graphs) {
+    std::string file_path = save_graphs_path + "/" + "hwopt_d_graph_kernel_opt_before_graph_" +
+                            std::to_string(!is_before_kernel_select) + "_" + std::to_string(kernel_graph->graph_id()) +
+                            ".ir";
+    DumpIR(file_path, kernel_graph);
+  }
+
+  // Fuse graph kernels with basic ops
+  FuseGraphKernel(kernel_graph, is_before_kernel_select);
+
+  if (save_graphs) {
+    std::string file_path = save_graphs_path + "/" + "hwopt_d_graph_kernel_opt_end_graph_" +
+                            std::to_string(!is_before_kernel_select) + "_" + std::to_string(kernel_graph->graph_id()) +
+                            ".ir";
+    DumpIR(file_path, kernel_graph, true);
+  }
+}
+
+void AscendBackendFuseBasicOpt(const std::shared_ptr<session::KernelGraph> &kernel_graph,
+                               bool is_before_kernel_select) {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  if (!(context_ptr->enable_graph_kernel())) {
+    return;
+  }
+  bool save_graphs = context_ptr->save_graphs_flag();
+  auto save_graphs_path = context_ptr->save_graphs_path();
+  if (save_graphs_path.empty()) {
+    save_graphs_path = ".";
+  }
+  if (save_graphs) {
+    std::string file_path = save_graphs_path + "/" + "hwopt_d_fuse_basic_opt_before_graph_" +
+                            std::to_string(!is_before_kernel_select) + "_" + std::to_string(kernel_graph->graph_id()) +
+                            ".ir";
+    DumpIR(file_path, kernel_graph, true);
+  }
+
+  // Fuse basic ops with basic ops
+  FuseBasic(kernel_graph, is_before_kernel_select);
+
+  if (save_graphs) {
+    std::string file_path = save_graphs_path + "/" + "hwopt_d_fuse_basic_opt_end_graph_" +
+                            std::to_string(!is_before_kernel_select) + "_" + std::to_string(kernel_graph->graph_id()) +
+                            ".ir";
+    DumpIR(file_path, kernel_graph, true);
+  }
+}
+
+void AscendBackendAddAtomicClean(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  if (!(context_ptr->enable_graph_kernel())) {
+    return;
+  }
+  bool save_graphs = context_ptr->save_graphs_flag();
+  auto save_graphs_path = context_ptr->save_graphs_path();
+  if (save_graphs_path.empty()) {
+    save_graphs_path = ".";
+  }
+  if (save_graphs) {
+    std::string file_path = save_graphs_path + "/" + "hwopt_d_add_atomic_clean_before" + "_graph_" +
+                            std::to_string(kernel_graph->graph_id()) + ".ir";
+    DumpIR(file_path, kernel_graph);
+  }
+
+  AddAtomicClean(kernel_graph);
+
+  if (save_graphs) {
+    std::string file_path =
+      save_graphs_path + "/" + "hwopt_d_end" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir";
+    DumpIR(file_path, kernel_graph, true);
+  }
+}
+
+void AscendBackendUBFusionOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  if (!context_ptr->ir_fusion_flag()) {
+    MS_LOG(INFO) << "UBFusion is not enable, skip";
+    return;
+  }
+  bool save_graphs = context_ptr->save_graphs_flag();
+  auto save_graphs_path = context_ptr->save_graphs_path();
+  if (save_graphs_path.empty()) {
+    save_graphs_path = ".";
+  }
+  if (save_graphs) {
+    std::string file_path =
+      save_graphs_path + "/hwopt_d_ub_fusion_before_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir";
+    DumpIR(file_path, kernel_graph);
+  }
+  auto fusion_id_allocator = std::make_shared<FusionIdAllocator>();
+  MS_EXCEPTION_IF_NULL(fusion_id_allocator);
+  fusion_id_allocator->Init();
+  auto optimizer = std::make_shared<GraphOptimizer>();
+  auto ub_fusion_pm = std::make_shared<PassManager>("ub_fusion_pm");
+  ub_fusion_pm->AddPass(std::make_shared<Conv2DBackpropEltwiseEltwiseFusionPass>(fusion_id_allocator));
+  ub_fusion_pm->AddPass(std::make_shared<Conv2DBackpropEltwiseFusionPass>(fusion_id_allocator));
+  ub_fusion_pm->AddPass(std::make_shared<ConvBnReduceFusionPass>(fusion_id_allocator));
+  ub_fusion_pm->AddPass(std::make_shared<ConvSingleInFusionPass>(fusion_id_allocator));
+  ub_fusion_pm->AddPass(std::make_shared<BnupdateEltwiseFusionPass>(fusion_id_allocator));
+  ub_fusion_pm->AddPass(std::make_shared<BnupdateEltwiseEltwiseFusionPass>(fusion_id_allocator));
+  ub_fusion_pm->AddPass(std::make_shared<MatmulEltwiseFusionPass>(fusion_id_allocator));
+  ub_fusion_pm->AddPass(std::make_shared<ConvDoubleInFusionPass>(fusion_id_allocator));
+  ub_fusion_pm->AddPass(std::make_shared<ReduceEltwiseFusionPass>(fusion_id_allocator));
+  ub_fusion_pm->AddPass(std::make_shared<SegmentEltwiseFusionPass>(fusion_id_allocator));
+  ub_fusion_pm->AddPass(std::make_shared<MultiOutputFusionPass>(fusion_id_allocator));
+  ub_fusion_pm->AddPass(std::make_shared<EltwiseFusionPass>(fusion_id_allocator));
+  ub_fusion_pm->AddPass(std::make_shared<DepthwiseConvEltwiseFusionPass>(fusion_id_allocator));
+  ub_fusion_pm->AddPass(std::make_shared<UbPatternFusion>());
+  optimizer->AddPassManager(ub_fusion_pm);
+  (void)optimizer->Optimize(kernel_graph);
+  kernel_graph->SetExecOrderByDefault();
+  if (save_graphs) {
+    std::string file_path =
+      save_graphs_path + "/hwopt_d_ub_fusion_after_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir";
+    DumpIR(file_path, kernel_graph);
+  }
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.h
index 222c4b90b5a..8194ab467b3 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ASCEND_BACKEND_OPTIMIZATION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ASCEND_BACKEND_OPTIMIZATION_H_
 #include <memory>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 namespace mindspore {
 namespace opt {
 void RunOpAscendDataLayout(const std::shared_ptr<session::KernelGraph> &kernel_graph);
diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc b/mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.cc
index 9c498bd7361..fd4c0e59522 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.cc
@@ -14,18 +14,18 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 #include <set>
 #include "common/trans.h"
 #include "common/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 #include "utils/utils.h"
-#include "device/kernel_info.h"
-#include "kernel/oplib/oplib.h"
-#include "kernel/common_utils.h"
-#include "operator/ops.h"
-#include "session/anf_runtime_algorithm.h"
-#include "session/kernel_graph.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "frontend/operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h b/mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.h
similarity index 86%
rename from mindspore/ccsrc/pre_activate/ascend/ascend_helper.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.h
index ad48ca5291a..cb308a09a0b 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.h
@@ -19,10 +19,10 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "device/ascend/kernel_select_ascend.h"
-#include "kernel/kernel_query.h"
-#include "kernel/oplib/oplib.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/ascend/kernel_select_ascend.h"
+#include "backend/kernel_compiler/kernel_query.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
@@ -70,6 +70,21 @@ class KernelQuery {
   }
 };
 using KernelQueryPtr = std::shared_ptr<KernelQuery>;
+
+class OpFinder {
+ public:
+  OpFinder() = default;
+  virtual ~OpFinder() = default;
+  virtual int GetOpRegisteredOutputNum(const std::string &op_name) {
+    auto op_info = kernel::OpLib::FindOp(op_name, kernel::kTBE);
+    if (op_info == nullptr) {
+      return -1;
+    }
+    return op_info->outputs_ptr().size();
+  }
+};
+using OpFinderPtr = std::shared_ptr<OpFinder>;
+
 void RefreshKernelBuildInfo(const std::string &input_format, const std::string &output_format,
                             const AnfNodePtr &trans_data, const std::vector<kernel::Axis> &reshape_type = {});
 
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc
index 94318d63ca9..22183c9050f 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h
similarity index 85%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h
index 6cdc5885f69..dfc45b4688e 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc
index 1f7fef9e625..59915d43d4a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h
similarity index 85%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h
index b5688f3a36f..abaf264d2e3 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc
index 6091eb572dd..1bfff1b50ea 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h
similarity index 85%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h
index 7d779d35f8e..6bf74d52681 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc
similarity index 90%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc
index 963f1885fee..144ab4b53fd 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h
similarity index 85%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h
index 171352de9b5..93aa324566c 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc
similarity index 89%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc
index 63e7dcf6b88..a2ebfbe79e2 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h"
 
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h
index 7a06faa6245..224422530b3 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.cc
index a1261438111..1a67e3c39bf 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.h
index 062b8182fbb..911cf744de1 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.cc
index d83b32a8889..1eb26b12bc7 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.h
index bf7e581dffd..6dddd600c23 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc
index 98a6838bedd..285b8f6c07c 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h"
 
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h
similarity index 85%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h
index c2e72f26ff2..6746dad9842 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.cc
similarity index 90%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.cc
index 2f04e166921..1e24cce0e47 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.h
index 54ff0f59821..ae636876315 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.cc
index a516f04442b..27a7a786d13 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include <unordered_set>
 #include <memory>
 #include "debug/anf_ir_dump.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h
index 8d6eca774c4..dced2c2fa2a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h
@@ -21,11 +21,11 @@
 #include <string>
 
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc
similarity index 90%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc
index d1ef5dc83b6..7fcc6e45e09 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h
index 5baaa6db869..e0d08bb58dc 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.cc
index be4d2af1cb6..58a219aec73 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.h
index 0e2510128a4..40a45360a1c 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc
index 623f0e34267..95955818eba 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h"
 #include <vector>
 #include <algorithm>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h
index 42d896e96bd..4d56eee7b33 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc
index 0dcf2362bcb..f2117f93740 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.h"
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.h
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.h
index 41f06ba1f9c..f3b97f83572 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc
index 5bc0fdced7e..d93b47b66c1 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h"
 
 #include <vector>
 #include <unordered_set>
 #include <memory>
 #include <string>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/fusion_id_allocator.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h
similarity index 85%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h
index c6c5fe88dc1..371c2063999 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc
index faa5169c408..96855307052 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h"
+#include "backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.h"
 #include <vector>
 #include <tuple>
 #include <utility>
@@ -23,11 +23,11 @@
 #include <memory>
 #include <string>
 #include <algorithm>
-#include "kernel/kernel_fusion.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
-#include "device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
+#include "runtime/device/kernel_info.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.h
similarity index 85%
rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.h
index 7099c92772f..69eb0f43d4e 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.h
@@ -19,13 +19,13 @@
 #include <unordered_set>
 #include <vector>
 
-#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h"
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.cc
index 6d0906363ea..a729cdd0f99 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/enhancer/getnext_memcpy_elimination.h"
+#include "backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
-#include "optimizer/opt.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/opt.h"
 
 namespace mindspore::opt {
 
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.h b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.h
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.h
index 523fc87a383..365088b34a8 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_GETNEXT_MEMCPY_ELIMINATION_H
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_GETNEXT_MEMCPY_ELIMINATION_H
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.cc
index 01a3f789e76..bac9f54ace0 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h"
+#include "backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.h"
 #include <vector>
 #include <memory>
-#include "pre_activate/ascend/ascend_helper.h"
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.h
index eb3b78d33f9..6fefc322300 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_MEMCPY_ASYNC_FOR_GETNEXT_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_MEMCPY_ASYNC_FOR_GETNEXT_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.cc
index 63ea59d744c..2585006be63 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h"
+#include "backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.h"
 #include <vector>
 #include <set>
 #include <string>
 #include "utils/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "optimizer/opt.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/opt.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.h
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.h
index e2f3b781ed3..7bd730a84d5 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_MEMCPY_ASYNC_FOR_HCCL_OP_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.cc
similarity index 89%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.cc
index b73fe6c83c9..be61833fe4c 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.h"
+#include "backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.h"
 #include <vector>
 #include <string>
 #include <memory>
-#include "pre_activate/ascend/ascend_helper.h"
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "device/kernel_info.h"
-#include "kernel//oplib/oplib.h"
-#include "operator/ops.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler//oplib/oplib.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.h b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.h
similarity index 93%
rename from mindspore/ccsrc/pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.h
rename to mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.h
index bfc201ed116..6aed678ff2a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_pad_for_nms_with_mask.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_pad_for_nms_with_mask.h
@@ -16,8 +16,8 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_PAD_FOR_NMS_WITH_MASK_H
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_PAD_FOR_NMS_WITH_MASK_H
 
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.cc
new file mode 100644
index 00000000000..f508bb28688
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.cc
@@ -0,0 +1,103 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h"
+
+#include <string>
+#include <memory>
+#include <vector>
+#include <map>
+
+#include "utils/utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "common/utils.h"
+#include "backend/kernel_compiler/common_utils.h"
+
+namespace mindspore {
+namespace opt {
+namespace {
+using ConvertFunction = std::function<void(const CNodePtr &)>;
+
+void ConvertReduceAttrFraczAnd6HD(const CNodePtr &cnode);
+const size_t kAxis_H = 2;
+const size_t kAxis_W = 3;
+const size_t kAxis_6HD_H = 1;
+const size_t kAxis_6HD_W = 2;
+const std::map<std::string, ConvertFunction> kReduceConvertMap = {{kOpFormat_FRAC_Z, ConvertReduceAttrFraczAnd6HD},
+                                                                  {kOpFormat_C1HWNCoC0, ConvertReduceAttrFraczAnd6HD}};
+void SafeCheckFunction(const CNodePtr &cnode, const std::vector<int> &reduce_axis) {
+  if (reduce_axis.empty()) {
+    MS_LOG(EXCEPTION) << "The node " << cnode->DebugString() << "'s reduce axis got a empty vector";
+  }
+  if (AnfAlgo::GetInputTensorNum(cnode) != AnfAlgo::GetOutputTensorNum(cnode) &&
+      AnfAlgo::GetInputTensorNum(cnode) != 1) {
+    MS_LOG(EXCEPTION) << "the kind of reduce node [" << cnode->DebugString()
+                      << "] is not single input or single output ";
+  }
+  for (auto elem : reduce_axis) {
+    if (elem > 4) {
+      MS_LOG(INFO) << "reduce axis is larger than 4 dims reduce axis : [" << elem << "]";
+    }
+  }
+}
+
+void ConvertReduceAttrFraczAnd6HD(const CNodePtr &cnode) {
+  auto axis = kernel::GetReduceAttrAxis(cnode);
+  std::vector<int> convert_axis;
+  SafeCheckFunction(cnode, axis);
+  auto format = AnfAlgo::GetInputFormat(cnode, 0);
+  if (format != kOpFormat_FRAC_Z || format != kOpFormat_C1HWNCoC0) {
+    MS_LOG(EXCEPTION) << "The node [" << cnode->DebugString() << "] format " << format << " is not 5hd";
+  }
+  for (auto elem : axis) {
+    switch (elem) {
+      case kAxis_H:
+        convert_axis.emplace_back(kAxis_6HD_H);
+        break;
+      case kAxis_W:
+        convert_axis.emplace_back(kAxis_6HD_W);
+        break;
+      default:
+        MS_LOG(INFO) << "reduce axis is axis : [" << elem << "]"
+                     << " but the format is not supported this reduce axis";
+    }
+  }
+  AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(convert_axis), cnode);
+}
+}  // namespace
+
+const BaseRef ChangeAxisOfReduceKernel::DefinePattern() const {
+  VarPtr X = std::make_shared<Var>();
+  VarPtr Xs = std::make_shared<SeqVar>();
+  return VectorRef({X, Xs});
+}
+
+const AnfNodePtr ChangeAxisOfReduceKernel::Process(const FuncGraphPtr &, const AnfNodePtr &node,
+                                                   const EquivPtr &) const {
+  if (node == nullptr || !node->isa<CNode>() || !AnfAlgo::IsRealKernel(node)) {
+    return nullptr;
+  }
+  if (AnfAlgo::GetOpPattern(node) != kernel::kReducePattern) {
+    return nullptr;
+  }
+  auto convert_map = kReduceConvertMap.find(AnfAlgo::GetInputFormat(node, 0));
+  if (convert_map == kReduceConvertMap.end()) {
+    return nullptr;
+  }
+  convert_map->second(node->cast<CNodePtr>());
+  return nullptr;
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h
new file mode 100644
index 00000000000..6bf1287ae7f
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_CHANGE_AXIS_OF_REDUCE_KENRNEL_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_CHANGE_AXIS_OF_REDUCE_KENRNEL_H_
+
+#include "backend/optimizer/common/optimizer.h"
+
+namespace mindspore {
+namespace opt {
+class ChangeAxisOfReduceKernel : public PatternProcessPass {
+ public:
+  explicit ChangeAxisOfReduceKernel(bool multigraph = true)
+      : PatternProcessPass("change_axis_of_reduce_kernel", multigraph) {}
+  ~ChangeAxisOfReduceKernel() override = default;
+  const BaseRef DefinePattern() const override;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_CHANGE_AXIS_OF_REDUCE_KENRNEL_H_
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.cc
index 7c8fb70fda1..7da00273109 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/format_type/check_consistency.h"
+#include "backend/optimizer/ascend/format_type/check_consistency.h"
 
 #include <string>
 #include <memory>
 #include <vector>
 
 #include "utils/utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
-#include "kernel/common_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.h
index e134547dc85..bf956895de3 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/check_consistency.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/check_consistency.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_CHECK_CONSISTENCY_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_CHECK_CONSISTENCY_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc
similarity index 89%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc
index c0f99ed4159..48948dca06e 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.h"
+#include "backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/kernel_build_info.h"
-#include "kernel/kernel_query.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel_query.h"
 namespace mindspore {
 namespace opt {
 const BaseRef ConvertUnSupportNodeToAICPU::DefinePattern() const {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.h
similarity index 93%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.h
index 80cc8170ace..e534a851ad4 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/convert_unsupported_transnode_to_aicpu.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 #ifndef MINDSPORE_CONVERT_UNSUPPORTED_NODE_TO_AICPU_H
 #define MINDSPORE_CONVERT_UNSUPPORTED_NODE_TO_AICPU_H
 namespace mindspore {
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.cc
new file mode 100644
index 00000000000..4375a08031a
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.cc
@@ -0,0 +1,226 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.h"
+#include <utility>
+#include <vector>
+#include <memory>
+#include <string>
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/optimizer/common/helper.h"
+
+namespace mindspore {
+namespace opt {
+namespace {
+session::KernelWithIndex FindRefOriginNode(const AnfNodePtr &node) {
+  session::KernelWithIndex kernel_with_index = AnfAlgo::VisitKernel(node, 0);
+  AnfNodePtr cur_node = kernel_with_index.first;
+  size_t cur_out_index = kernel_with_index.second;
+  MS_EXCEPTION_IF_NULL(cur_node);
+  if (cur_node->isa<CNode>()) {
+    auto cnode = cur_node->cast<CNodePtr>();
+    MS_EXCEPTION_IF_NULL(cnode);
+    std::string op_name = AnfAlgo::GetCNodeName(cnode);
+    auto op_info = mindspore::kernel::OpLib::FindOp(op_name, kernel::kTBE);
+    // deal ref op
+    if (op_info != nullptr && op_info->is_ref()) {
+      auto ref_infos = op_info->ref_infos();
+      if (ref_infos.count(cur_out_index) != 0) {
+        auto in_index = ref_infos.at(cur_out_index);
+        if (in_index > cnode->inputs().size()) {
+          MS_LOG(EXCEPTION) << "ref op has wrong inputs: op inputs num is " << cnode->inputs().size()
+                            << ", ref info is " << cur_out_index;
+        }
+        AnfNodePtr next_node = cnode->input(in_index + 1);
+        return FindRefOriginNode(next_node);
+      }
+    }
+
+    // deal special (trans,cast,reshape) op
+    if (op_name == prim::kPrimCast->name() || op_name == prim::kPrimTranspose->name() ||
+        op_name == prim::kPrimReshape->name() || op_name == kTransDataOpName) {
+      AnfNodePtr next_node = cnode->input(1);
+      return FindRefOriginNode(next_node);
+    }
+  }
+
+  return kernel_with_index;
+}
+
+void AddRefPairToKernelGraph(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const AnfNodePtr &get_item,
+                             const AnfNodePtr &final_node, size_t final_index,
+                             const session::KernelWithIndex &origin_pair) {
+  // record the ref_pair
+  auto kernel_graph = func_graph->cast<KernelGraphPtr>();
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+  // if the final node is get item, means no trans or cast op is added, the final node is itself
+  // so add the pair for itself, because the get item will removed later
+  auto final_ref = (final_node == get_item ? cnode : final_node);
+  session::AnfWithOutIndex final_pair = std::make_pair(final_ref, final_index);
+  if (kernel_graph->IsInRefOutputMap(final_pair)) {
+    MS_LOG(EXCEPTION) << "ref_pair is already in ref map, node is " << final_ref->DebugString() << ", index is "
+                      << final_index;
+  }
+  MS_LOG(DEBUG) << "Add Ref pair, final {node ptr " << final_pair.first.get() << " , info is "
+                << final_pair.first->DebugString() << " , index is " << final_pair.second << "}, origin {node ptr "
+                << origin_pair.first.get() << ", info is " << origin_pair.first->DebugString() << " : index "
+                << origin_pair.second << "}";
+  kernel_graph->AddRefCorrespondPairs(final_pair, origin_pair);
+}
+
+// if get_item is nullptr, the additional node will link to the cnode
+// else the additional node will link to the get_item node (the get_item node link to cnode)
+AnfNodePtr AddAdditionalToRefOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode, size_t output_index,
+                                    size_t input_index, const AnfNodePtr &get_item) {
+  AnfNodePtr final_node = (get_item == nullptr ? cnode : get_item);
+  size_t final_index = output_index;
+  AnfNodePtr input_node = AnfAlgo::GetInputNode(cnode, input_index);
+  session::KernelWithIndex origin_pair;
+  origin_pair = FindRefOriginNode(input_node);
+  MS_EXCEPTION_IF_NULL(origin_pair.first);
+  if (!origin_pair.first->isa<Parameter>()) {
+    MS_LOG(WARNING) << "ref op origin node is not parameter";
+  }
+  MS_LOG(DEBUG) << "DealRefTransAndCast the node input index " << input_index << ", find origin op is "
+                << origin_pair.first->DebugString() << ", index is " << origin_pair.second;
+  auto origin_format = AnfAlgo::GetOutputFormat(origin_pair.first, origin_pair.second);
+  auto origin_type = AnfAlgo::GetOutputDeviceDataType(origin_pair.first, origin_pair.second);
+  auto cur_format = AnfAlgo::GetOutputFormat(cnode, output_index);
+  auto cur_type = AnfAlgo::GetOutputDeviceDataType(cnode, output_index);
+  auto cur_shape = AnfAlgo::GetOutputInferShape(cnode, output_index);
+  // insert trans
+  if (origin_format != cur_format && cur_shape.size() > 1) {
+    auto kernel_select = std::make_shared<KernelSelect>();
+    final_node = NewTransOpNode(func_graph, final_node, kernel_select, false, prim::KPrimTransData->name());
+    RefreshKernelBuildInfo(cur_format, origin_format, final_node);
+    final_index = 0;
+    MS_EXCEPTION_IF_NULL(final_node);
+    MS_LOG(INFO) << "DealRefTransAndCast add trans op, op debug info is " << final_node->DebugString();
+  }
+  // insert cast
+  if (origin_type != cur_type) {
+    final_node =
+      AddCastOpNodeToGraph(func_graph, final_node, origin_format, cur_type, origin_type, cur_shape, cur_type);
+    MS_EXCEPTION_IF_NULL(final_node);
+    final_node->set_scope(cnode->scope());
+    final_index = 0;
+    MS_LOG(INFO) << "DealRefTransAndCast add cast op, op debug info is " << final_node->DebugString();
+  }
+  // add ref pair
+  AddRefPairToKernelGraph(func_graph, cnode, get_item, final_node, final_index, origin_pair);
+  // insert depend
+  if (origin_format != cur_format || origin_type != cur_type) {
+    std::vector<AnfNodePtr> depend_nodes{NewValueNode(prim::kPrimDepend), cnode, final_node};
+    final_node = func_graph->NewCNode(depend_nodes);
+    MS_LOG(INFO) << "DealRefTransAndCast add denpend, op debug info is " << final_node->DebugString();
+  }
+
+  return final_node;
+}
+AnfNodePtr DealRefForMultipleOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode,
+                                    const std::shared_ptr<kernel::OpInfo> &op_info) {
+  MS_EXCEPTION_IF_NULL(op_info);
+  auto ref_infos = op_info->ref_infos();
+  std::vector<AnfNodePtr> make_tuple_inputs;
+  AbstractBasePtrList abstract_list;
+  make_tuple_inputs.push_back(NewValueNode(prim::kPrimMakeTuple));
+  for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(cnode); ++output_index) {
+    AnfNodePtr final_node = CreatTupleGetItemNode(func_graph, cnode, output_index);
+    // deal with ref output
+    if (ref_infos.count(output_index) != 0) {
+      auto input_index = ref_infos.at(output_index);
+      final_node = AddAdditionalToRefOutput(func_graph, cnode, output_index, input_index, final_node);
+    }
+    MS_EXCEPTION_IF_NULL(final_node);
+    abstract_list.push_back(final_node->abstract());
+    make_tuple_inputs.push_back(final_node);
+  }
+  MS_EXCEPTION_IF_NULL(func_graph);
+  AnfNodePtr make_tuple = func_graph->NewCNode(make_tuple_inputs);
+  MS_EXCEPTION_IF_NULL(make_tuple);
+  make_tuple->set_abstract(std::make_shared<abstract::AbstractTuple>(abstract_list));
+  return make_tuple;
+}
+
+AnfNodePtr DealRefSigleOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode,
+                              const std::shared_ptr<kernel::OpInfo> &op_info) {
+  MS_EXCEPTION_IF_NULL(cnode);
+  MS_EXCEPTION_IF_NULL(op_info);
+  auto ref_infos = op_info->ref_infos();
+  for (const auto &ref_info : ref_infos) {
+    if (ref_info.second > cnode->inputs().size()) {
+      MS_LOG(EXCEPTION) << "ref op has wrong inputs: op inputs num is " << cnode->inputs().size() << ", ref info is "
+                        << ref_info.second;
+    }
+    return AddAdditionalToRefOutput(func_graph, cnode, ref_info.first, ref_info.second, nullptr);
+  }
+  return nullptr;
+}
+}  // namespace
+
+const BaseRef DealRefTransAndCast::DefinePattern() const {
+  VarPtr V = std::make_shared<CondVar>(UnVisited);
+  VarPtr Xs = std::make_shared<SeqVar>();
+  return VectorRef({V, Xs});
+}
+
+void DealBroadCastAsRef(const FuncGraphPtr &func_graph, const CNodePtr &cnode) {
+  if (AnfAlgo::GetCNodeName(cnode) == kBroadcastOpName) {
+    auto input_size = AnfAlgo::GetInputTensorNum(cnode);
+    for (size_t i = 0; i < input_size; ++i) {
+      auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(cnode, i);
+      auto input_node = input_node_with_index.first;
+      MS_EXCEPTION_IF_NULL(input_node);
+      MS_LOG(INFO) << "origin node:" << input_node->fullname_with_scope();
+      AddRefPairToKernelGraph(func_graph, cnode, nullptr, cnode, i, input_node_with_index);
+    }
+  }
+}
+
+const AnfNodePtr DealRefTransAndCast::Process(const FuncGraphPtr &graph, const AnfNodePtr &node,
+                                              const EquivPtr &) const {
+  if (node == nullptr || !node->isa<CNode>()) {
+    return nullptr;
+  }
+  AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node);
+  auto cnode = node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(cnode);
+  if (!AnfAlgo::IsRealCNodeKernel(cnode)) {
+    return nullptr;
+  }
+
+  DealBroadCastAsRef(graph, cnode);
+
+  auto op_name = AnfAlgo::GetCNodeName(cnode);
+  auto op_info = mindspore::kernel::OpLib::FindOp(op_name, kernel::kTBE);
+  if (op_info == nullptr || !op_info->is_ref()) {
+    return nullptr;
+  }
+  if (op_info->is_ref()) {
+    auto type = cnode->Type();
+    MS_EXCEPTION_IF_NULL(type);
+    if (!type->isa<Tuple>()) {
+      return DealRefSigleOutput(graph, cnode, op_info);
+    } else {
+      return DealRefForMultipleOutput(graph, cnode, op_info);
+    }
+  }
+  return nullptr;
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/deal_ref_trans_and_cast.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.h
similarity index 89%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/deal_ref_trans_and_cast.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.h
index 1b54a7b111d..cb3b13dc496 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/deal_ref_trans_and_cast.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_DEAL_REF_TRANS_AND_CAST_H_
 
 #include "ir/anf.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_cast.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_cast.cc
index 3d09233d996..c3f79006452 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_cast.cc
@@ -13,22 +13,22 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/format_type/insert_cast.h"
+#include "backend/optimizer/ascend/format_type/insert_cast.h"
 
 #include <memory>
 #include <string>
 #include <vector>
 #include <utility>
 
-#include "device/kernel_info.h"
-#include "pre_activate/ascend/ascend_helper.h"
-#include "pre_activate/common/helper.h"
-#include "kernel/kernel_build_info.h"
-#include "kernel/oplib/oplib.h"
-#include "session/anf_runtime_algorithm.h"
-#include "session/kernel_graph.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
 #include "utils/utils.h"
-#include "kernel/common_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace opt {
@@ -181,15 +181,6 @@ const AnfNodePtr InsertCast::Process(const FuncGraphPtr &func_graph, const AnfNo
 
   if (AnfAlgo::IsGraphKernel(node)) {
     return ProcessGraphKernelOp(func_graph, node);
-  } else {
-    // insert cast for single op.
-    AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node);
-    // process input
-    CNodePtr cnode = node->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(cnode);
-    auto new_node = InsertCastForInput(func_graph, cnode);
-    // process output
-    return InsertCastForOutput(func_graph, new_node, std::vector<bool>(AnfAlgo::GetOutputTensorNum(new_node), true));
   }
   // insert cast for single op.
   AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node);
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_cast.h
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_cast.h
index a7f93ec8f37..19c282aac9b 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_cast.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_cast.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_INSERT_CAST_H_
 #include <string>
 
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pattern_engine.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
 #include "ir/anf.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_trans_op.cc
similarity index 90%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_trans_op.cc
index 3f77c68f861..a22a1faa5fd 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_trans_op.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/format_type/insert_trans_op.h"
+#include "backend/optimizer/ascend/format_type/insert_trans_op.h"
 #include <memory>
 #include <vector>
 #include "utils/utils.h"
-#include "pre_activate/ascend/ascend_helper.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
-#include "kernel/oplib/oplib.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_trans_op.h
similarity index 90%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_trans_op.h
index eb6cfa95427..0b213753270 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_trans_op.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_trans_op.h
@@ -20,9 +20,9 @@
 #include <string>
 #include <utility>
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_transdata_for_runop.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transdata_for_runop.cc
similarity index 84%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/insert_transdata_for_runop.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transdata_for_runop.cc
index 3df513a19f2..d0b92b250da 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_transdata_for_runop.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transdata_for_runop.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/format_type/insert_transdata_for_runop.h"
+#include "backend/optimizer/ascend/format_type/insert_transdata_for_runop.h"
 #include <memory>
 #include "utils/utils.h"
-#include "pre_activate/ascend/ascend_helper.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
-#include "kernel/oplib/oplib.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_transdata_for_runop.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transdata_for_runop.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/insert_transdata_for_runop.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transdata_for_runop.h
index f699cdd580d..82ff5f2b9a9 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/insert_transdata_for_runop.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transdata_for_runop.h
@@ -20,9 +20,9 @@
 #include <string>
 #include <utility>
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/merge_cast_to_op.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/merge_cast_to_op.cc
index b1817cec3dc..88e9fa77b83 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/merge_cast_to_op.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/format_type/merge_cast_to_op.h"
+#include "backend/optimizer/ascend/format_type/merge_cast_to_op.h"
 
 #include <memory>
 #include <vector>
 #include <algorithm>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/merge_cast_to_op.h
similarity index 90%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/merge_cast_to_op.h
index 7e05c8a02af..d0e467b7a3a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/merge_cast_to_op.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/merge_cast_to_op.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_MERGE_CAST_TO_OP_H
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/modify_ops_attrs.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/modify_ops_attrs.cc
index 42061957b90..adca536f049 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/modify_ops_attrs.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/format_type/modify_ops_attrs.h"
+#include "backend/optimizer/ascend/format_type/modify_ops_attrs.h"
 #include <vector>
 #include <memory>
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
-#include "kernel/common_utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/modify_ops_attrs.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/modify_ops_attrs.h
index 25ec94b6b4f..f5608db05ad 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/modify_ops_attrs.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/modify_ops_attrs.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_MODIFY_OPS_ATTRS_H
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_MODIFY_OPS_ATTRS_H
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.cc
similarity index 62%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.cc
index d81a8c90cea..91b9326cc14 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.cc
@@ -14,18 +14,19 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/format_type/rectify_do_mask_kernel_info.h"
+#include "backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.h"
 
 #include <vector>
 #include <map>
 #include <string>
 #include <memory>
 
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include "utils/utils.h"
-#include "kernel/common_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
 #include "utils/context/ms_context.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
@@ -50,16 +51,11 @@ const AnfNodePtr RectifyDoMaskKernelInfo::Process(const FuncGraphPtr &graph, con
     return nullptr;
   }
   std::vector<CNodePtr> do_mask_node_list;
-  auto manager = graph->manager();
-  MS_EXCEPTION_IF_NULL(manager);
-  auto node_map = manager->node_users();
-  auto iter = node_map.find(node);
-  if (iter == node_map.end()) {
-    MS_LOG(EXCEPTION) << "Cannot find the node " << node->DebugString() << " in the graph manager!";
-  }
-  auto gen_mask_output_nodes = iter->second;
-  for (const auto &output_node : gen_mask_output_nodes) {
+  auto gen_mask_output_nodes = GetRealNodeUsedList(graph, cnode);
+  MS_EXCEPTION_IF_NULL(gen_mask_output_nodes);
+  for (const auto &output_node : *gen_mask_output_nodes) {
     if (AnfAlgo::GetCNodeName(output_node.first) == prim::kPrimDropoutDoMask->name()) {
+      MS_EXCEPTION_IF_NULL(output_node.first);
       auto output_cnode = output_node.first->cast<CNodePtr>();
       do_mask_node_list.push_back(output_cnode);
     }
@@ -76,11 +72,12 @@ const AnfNodePtr RectifyDoMaskKernelInfo::Process(const FuncGraphPtr &graph, con
                         << " GenMask " << node->DebugString();
     }
   }
-  RectifyKernelInfo(do_mask_node_list);
+  RectifyKernelInfo(do_mask_node_list, graph);
   return nullptr;
 }
 
-void RectifyDoMaskKernelInfo::RectifyKernelInfo(const std::vector<CNodePtr> &do_mask_node_list) const {
+void RectifyDoMaskKernelInfo::RectifyKernelInfo(const std::vector<CNodePtr> &do_mask_node_list,
+                                                const FuncGraphPtr &graph) const {
   std::map<std::string, size_t> format_counter;
   std::string special_format;
   std::string convert_format;
@@ -94,17 +91,6 @@ void RectifyDoMaskKernelInfo::RectifyKernelInfo(const std::vector<CNodePtr> &do_
     } else {
       format_counter[do_mask_data_format] = format_counter[do_mask_data_format] + 1;
     }
-    // if has two or more special format we need change all domask's format to default that can avoid insert more
-    // transdata
-    if (format_counter.size() > 2) {
-      convert_format = kOpFormat_DEFAULT;
-      break;
-    }
-    if (kHWSpecialFormatSet.find(do_mask_data_format) != kHWSpecialFormatSet.end() &&
-        special_format != do_mask_data_format) {
-      convert_format = kOpFormat_DEFAULT;
-      break;
-    }
   }
   if (format_counter.size() == 1) {
     return;
@@ -112,17 +98,23 @@ void RectifyDoMaskKernelInfo::RectifyKernelInfo(const std::vector<CNodePtr> &do_
   if (convert_format.empty()) {
     convert_format = GetConvertFormat(format_counter);
   }
-  RectifyDropOutDoMaskKernelInfo(do_mask_node_list, convert_format);
+  RectifyDropOutDoMaskKernelInfo(do_mask_node_list, convert_format, graph);
 }
 
 std::string RectifyDoMaskKernelInfo::GetConvertFormat(const std::map<std::string, size_t> &format_counter) const {
-  std::string convert_format;
-  const size_t counter = 0;
+  std::string convert_format = kOpFormat_DEFAULT;
+  size_t counter = 0;
+  if (format_counter.size() > 2) {
+    return kOpFormat_DEFAULT;
+  }
+  if (format_counter.size() == 2 && format_counter.find(kOpFormat_DEFAULT) == format_counter.end()) {
+    return kOpFormat_DEFAULT;
+  }
   for (const auto &iter : format_counter) {
     if (counter < iter.second) {
       convert_format = iter.first;
-    }
-    if (counter == iter.second && kHWSpecialFormatSet.find(convert_format) == kHWSpecialFormatSet.end()) {
+      counter = iter.second;
+    } else if (counter == iter.second && kHWSpecialFormatSet.find(iter.first) != kHWSpecialFormatSet.end()) {
       convert_format = iter.first;
     }
   }
@@ -130,13 +122,17 @@ std::string RectifyDoMaskKernelInfo::GetConvertFormat(const std::map<std::string
 }
 
 void RectifyDoMaskKernelInfo::RectifyDropOutDoMaskKernelInfo(const std::vector<CNodePtr> &do_mask_node_list,
-                                                             const std::string &format) const {
+                                                             const std::string &format,
+                                                             const FuncGraphPtr &graph) const {
   for (const auto &do_mask : do_mask_node_list) {
-    auto builder =
-      std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(do_mask));
-    builder->SetInputFormat(format, 0);
-    builder->SetOutputFormat(format, 0);
-    AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), do_mask.get());
+    if (AnfAlgo::GetInputFormat(do_mask, 0) != format) {
+      auto builder =
+        std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(do_mask));
+      builder->SetInputFormat(format, 0);
+      builder->SetOutputFormat(format, 0);
+      AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), do_mask.get());
+      ReSelecChildNodeKernelInfo(do_mask, graph);
+    }
   }
 }
 
@@ -159,5 +155,30 @@ AnfNodePtr RectifyDoMaskKernelInfo::RectifyKernelInfoInPynativeProcess(const Anf
   }
   return nullptr;
 }
+
+void RectifyDoMaskKernelInfo::ReSelecChildNodeKernelInfo(const CNodePtr &cnode, const FuncGraphPtr &graph) const {
+  MS_EXCEPTION_IF_NULL(cnode);
+  auto output_node_list = GetRealNodeUsedList(graph, cnode);
+  MS_EXCEPTION_IF_NULL(output_node_list);
+  for (const auto &out_node_info : *output_node_list) {
+    MS_EXCEPTION_IF_NULL(out_node_info.first);
+    auto out_node = out_node_info.first->cast<CNodePtr>();
+    if (AnfAlgo::IsRealKernel(out_node_info.first)) {
+      auto ori_build_info = AnfAlgo::GetSelectKernelBuildInfo(out_node);
+      kernel_selecter->SelectKernel(out_node);
+      auto new_build_info = AnfAlgo::GetSelectKernelBuildInfo(out_node);
+      MS_EXCEPTION_IF_NULL(new_build_info);
+      MS_EXCEPTION_IF_NULL(ori_build_info);
+      if ((*new_build_info) != (*ori_build_info)) {
+        ReSelecChildNodeKernelInfo(out_node, graph);
+      }
+    } else if (AnfAlgo::GetCNodeName(out_node) == prim::kPrimTupleGetItem->name() ||
+               AnfAlgo::GetCNodeName(out_node) == prim::kPrimDepend->name()) {
+      ReSelecChildNodeKernelInfo(out_node, graph);
+    } else {
+      MS_LOG(INFO) << "Reselected the node " << cnode->DebugString() << " failed";
+    }
+  }
+}
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.h
similarity index 78%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.h
index 81bad4d8f89..cc9333a0130 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/rectify_do_mask_kernel_info.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.h
@@ -19,23 +19,28 @@
 #include <map>
 #include <string>
 #include <vector>
+#include <memory>
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 namespace mindspore {
 namespace opt {
 class RectifyDoMaskKernelInfo : public PatternProcessPass {
  public:
   explicit RectifyDoMaskKernelInfo(bool multigraph = true)
-      : PatternProcessPass("batch_norm_bert_fission", multigraph) {}
+      : PatternProcessPass("batch_norm_bert_fission", multigraph), kernel_selecter(std::make_shared<KernelSelect>()) {}
   ~RectifyDoMaskKernelInfo() override = default;
   const BaseRef DefinePattern() const override;
   const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
 
  private:
-  void RectifyKernelInfo(const std::vector<CNodePtr> &do_mask_node_list) const;
+  void RectifyKernelInfo(const std::vector<CNodePtr> &do_mask_node_list, const FuncGraphPtr &graph) const;
   AnfNodePtr RectifyKernelInfoInPynativeProcess(const AnfNodePtr &node) const;
   std::string GetConvertFormat(const std::map<std::string, size_t> &format_counter) const;
-  void RectifyDropOutDoMaskKernelInfo(const std::vector<CNodePtr> &do_mask_node_list, const std::string &format) const;
+  void RectifyDropOutDoMaskKernelInfo(const std::vector<CNodePtr> &do_mask_node_list, const std::string &format,
+                                      const FuncGraphPtr &graph) const;
+  void ReSelecChildNodeKernelInfo(const CNodePtr &cnode, const FuncGraphPtr &graph) const;
+  KernelSelectPtr kernel_selecter;
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/remove_no_use_reshape_op.cc
similarity index 88%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/remove_no_use_reshape_op.cc
index dde40a50900..09992005a4f 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/remove_no_use_reshape_op.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/format_type/remove_no_use_reshape_op.h"
+#include "backend/optimizer/ascend/format_type/remove_no_use_reshape_op.h"
 #include <vector>
 #include <memory>
-#include "pre_activate/common/helper.h"
-#include "kernel/common_utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/remove_no_use_reshape_op.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.h
rename to mindspore/ccsrc/backend/optimizer/ascend/format_type/remove_no_use_reshape_op.h
index 4942c2fc082..135f11f52c8 100644
--- a/mindspore/ccsrc/pre_activate/ascend/format_type/remove_no_use_reshape_op.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/remove_no_use_reshape_op.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_REMOVE_NO_USE_RESHAPE_OP_H
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_FORMAT_TYPE_REMOVE_NO_USE_RESHAPE_OP_H
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/addn_fission.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/addn_fission.cc
index b9a86f7bcb8..a3fd704bc53 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/addn_fission.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/addn_fission.h"
+#include "backend/optimizer/ascend/ir_fission/addn_fission.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/addn_fission.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/addn_fission.h
index 3c62391f9a5..e04cdfdf7b6 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/addn_fission.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADDN_FISSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADDN_FISSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.cc
index e6a8864e468..f0edefd5f5d 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/batch_norm_bert_fission.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.h"
 #include <vector>
 #include <memory>
 #include <algorithm>
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.h
index fc214817fc4..23f0e56035e 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_bert_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_BERT_FISSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_BERT_FISSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.cc
index 5e411116607..97c67e44415 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.h"
 #include <vector>
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.h
index a8eefdaa852..97100de2849 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_GRAD_INFER_FISSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_split.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_split.cc
index 270b02cb00e..97122386c61 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_split.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/batch_norm_grad_split.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_grad_split.h"
 
 #include <vector>
 #include <string>
@@ -22,9 +22,9 @@
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
 #include "common/utils.h"
-#include "pre_activate/common/helper.h"
-#include "device/kernel_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_split.h
similarity index 93%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_split.h
index e539fdb27cd..e5378d83326 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/batch_norm_grad_split.h
@@ -16,8 +16,8 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_GRAD_SPLIT_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_GRAD_SPLIT_H_
 
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_grad_split.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_grad_split.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_grad_split.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_grad_split.cc
index 6282ed4f760..6c4e226120d 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_grad_split.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_grad_split.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/bn_grad_split.h"
+#include "backend/optimizer/ascend/ir_fission/bn_grad_split.h"
 
 #include <vector>
 #include <string>
@@ -22,9 +22,9 @@
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
 #include "common/utils.h"
-#include "pre_activate/common/helper.h"
-#include "device/kernel_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_grad_split.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_grad_split.h
similarity index 93%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_grad_split.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_grad_split.h
index 17e1f9b98e7..6fe78d47246 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_grad_split.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_grad_split.h
@@ -16,8 +16,8 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BN_GRAD_SPLIT_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BN_GRAD_SPLIT_H_
 
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_split.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_split.cc
index 66ffa24bf12..33670e5703a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_split.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/bn_split.h"
+#include "backend/optimizer/ascend/ir_fission/bn_split.h"
 
 #include <vector>
 #include <string>
@@ -21,9 +21,9 @@
 
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
-#include "pre_activate/common/helper.h"
-#include "device/kernel_info.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_split.h
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_split.h
index bc5975af175..4340ba0af6d 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/bn_split.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/bn_split.h
@@ -16,8 +16,8 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BN_SPLIT_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BN_SPLIT_H_
 
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/lars_v2_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/lars_v2_fission.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/lars_v2_fission.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/lars_v2_fission.cc
index 479e00e4c0e..e8a778b36f6 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/lars_v2_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/lars_v2_fission.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/lars_v2_fission.h"
+#include "backend/optimizer/ascend/ir_fission/lars_v2_fission.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/lars_v2_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/lars_v2_fission.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/lars_v2_fission.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/lars_v2_fission.h
index 846d221c538..3a165f2b297 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/lars_v2_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/lars_v2_fission.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_LARS_V2_FISSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_LARS_V2_FISSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/layer_norm_grad_split.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/layer_norm_grad_split.cc
index 1a25d836509..1d19def787f 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/layer_norm_grad_split.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/layer_norm_grad_split.h"
+#include "backend/optimizer/ascend/ir_fission/layer_norm_grad_split.h"
 
 #include <memory>
 #include <vector>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
 #include "ir/primitive.h"
 #include "common/utils.h"
 #include "utils/utils.h"
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/layer_norm_grad_split.h
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/layer_norm_grad_split.h
index f442446b01a..c1501b15933 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/layer_norm_grad_split.h
@@ -18,9 +18,9 @@
 
 #include <vector>
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/single_batch_norm_fission.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/single_batch_norm_fission.cc
index 159be2ac3b4..133d51734fe 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/single_batch_norm_fission.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/single_batch_norm_fission.h"
+#include "backend/optimizer/ascend/ir_fission/single_batch_norm_fission.h"
 #include <vector>
 #include <memory>
 #include <algorithm>
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/single_batch_norm_fission.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/single_batch_norm_fission.h
index 145603132b2..fb641c12d62 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/single_batch_norm_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/single_batch_norm_fission.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_SINGLE_BATCH_NORM_FISSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_SINGLE_BATCH_NORM_FISSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/split_fission.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/split_fission.cc
index c39a5e01e69..063f81a1ca2 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/split_fission.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/split_fission.h"
+#include "backend/optimizer/ascend/ir_fission/split_fission.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
@@ -82,6 +82,9 @@ void CreateOutputShapeAndTypeId(const CNodePtr &origin_cnode, int split_dim, int
   MS_EXCEPTION_IF_NULL(new_type_ids);
   MS_EXCEPTION_IF_NULL(new_output_shapes);
   auto output_shape = AnfAlgo::GetOutputInferShape(origin_cnode, 0);
+  if (split_dim < 0) {
+    split_dim += output_shape.size();
+  }
   output_shape[split_dim] = split_size;
   TypeId type_id = AnfAlgo::GetOutputInferDataType(origin_cnode, 0);
   for (int i = 0; i < num_split; ++i) {
@@ -97,6 +100,9 @@ void SetAttrAndAbstractForBaseSplitv(const CNodePtr &origin_cnode, const CNodePt
   std::vector<std::vector<size_t>> base_output_shapes_base;
   auto output_shape = AnfAlgo::GetOutputInferShape(origin_cnode, 0);
   TypeId type_id = AnfAlgo::GetOutputInferDataType(origin_cnode, 0);
+  if (split_dim < 0) {
+    split_dim += output_shape.size();
+  }
   for (int i = 0; i < num_split; ++i) {
     output_shape[split_dim] = size_splits_base[i];
     base_output_shapes_base.emplace_back(output_shape);
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/split_fission.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/split_fission.h
index c2763bb7141..6428a21e730 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/split_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/split_fission.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_SPLIT_FISSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_SPLIT_FISSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/tensor_scatter_update_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/tensor_scatter_update_fission.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.cc
index 6e6cea5ae55..c9a879e9219 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/tensor_scatter_update_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/tensor_scatter_update_fission.h"
+#include "backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h"
 #include <vector>
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/tensor_scatter_update_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/tensor_scatter_update_fission.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h
index 0ada93ac708..0f7efb029c0 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/tensor_scatter_update_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TENSOR_SCATTER_UPDATE_FISSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TENSOR_SCATTER_UPDATE_FISSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/topk_split.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/topk_split.cc
index c8477353f99..6eeb7a61f7e 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/topk_split.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/topk_split.h"
+#include "backend/optimizer/ascend/ir_fission/topk_split.h"
 #include <string>
 #include <vector>
 #include <memory>
 #include <unordered_set>
-#include "pre_activate/common/helper.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include "utils/utils.h"
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/topk_split.h
similarity index 93%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/topk_split.h
index e7293e1fa39..e005a83a2f7 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/topk_split.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TOPK_SPLIT_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.cc
index bfb7e50486c..057cf8deed7 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/transdata_split.h"
+#include "backend/optimizer/ascend/ir_fission/transdata_split.h"
 #include <set>
-#include "pre_activate/ascend/ascend_helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.h
similarity index 86%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.h
index f450897db14..bc681944c3c 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.h
@@ -20,12 +20,12 @@
 #include <utility>
 #include <memory>
 
-#include "pre_activate/common/pass.h"
+#include "backend/optimizer/common/pass.h"
 #include "ir/func_graph.h"
 #include "ir/anf.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.cc
index 59be003b150..189ac94546a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.cc
@@ -13,9 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h"
-#include "pre_activate/common/helper.h"
-
+#include "backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.h"
+#include "backend/optimizer/common/helper.h"
 namespace mindspore {
 namespace opt {
 AnfNodePtr AdamApplyOneFusion::CreateAdamApplyOneNode(const FuncGraphPtr &func_graph, const EquivPtr &equiv) const {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.h
index 5ee8a86cfbe..683a345cdbc 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.h
@@ -19,7 +19,7 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc
index f6077c95f25..b1afa338d4d 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.h"
 
 #include <memory>
 #include <vector>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.h
index 742295dd9c4..2d599a8cc95 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.h
@@ -19,7 +19,7 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 #include "utils/utils.h"
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/add_input_to_output.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/add_input_to_output.cc
new file mode 100644
index 00000000000..cc58d2b0574
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/add_input_to_output.cc
@@ -0,0 +1,115 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/ascend/ir_fusion/add_input_to_output.h"
+#include <vector>
+#include <algorithm>
+#include "backend/optimizer/ascend/ir_fusion/input_to_output_registry.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+
+namespace mindspore {
+namespace opt {
+namespace {
+void GetInputOrOutputNames(const CNodePtr &cnode, const std::string &attr_name, std::vector<std::string> *names_vec) {
+  MS_EXCEPTION_IF_NULL(names_vec);
+  auto primitive = AnfAlgo::GetCNodePrimitive(cnode);
+  MS_EXCEPTION_IF_NULL(primitive);
+  ValuePtr names_value = primitive->GetAttr(attr_name);
+  if (names_value == nullptr) {
+    return;
+  }
+  *names_vec = GetValue<std::vector<std::string>>(names_value);
+}
+
+void AddOutputs(const CNodePtr &cnode, const std::vector<size_t> &input_indices) {
+  MS_EXCEPTION_IF_NULL(cnode);
+  std::vector<std::string> input_names_vec;
+  GetInputOrOutputNames(cnode, kAttrInputNames, &input_names_vec);
+  std::vector<std::string> output_names_vec;
+  GetInputOrOutputNames(cnode, kAttrOutputNames, &output_names_vec);
+  AbstractBasePtrList abstract_list;
+  auto origin_abstract = cnode->abstract();
+  MS_EXCEPTION_IF_NULL(origin_abstract);
+  if (origin_abstract->isa<abstract::AbstractTuple>()) {
+    auto origin_abstract_tuple = dyn_cast<abstract::AbstractTuple>(origin_abstract);
+    MS_EXCEPTION_IF_NULL(origin_abstract_tuple);
+    AbstractBasePtrList origin_abstract_list = origin_abstract_tuple->elements();
+    (void)std::copy(origin_abstract_list.begin(), origin_abstract_list.end(), std::back_inserter(abstract_list));
+  } else {
+    abstract_list.emplace_back(origin_abstract);
+  }
+
+  for (size_t i = 0; i < input_indices.size(); ++i) {
+    size_t index = input_indices[i];
+    if (index + 1 >= cnode->inputs().size()) {
+      MS_LOG(INFO) << "The input index " << index << " for converting to output is out of range, "
+                   << "node: " << cnode->DebugString();
+      continue;
+    }
+    auto node_to_output = cnode->input(index + 1);
+    MS_EXCEPTION_IF_NULL(node_to_output);
+    abstract_list.emplace_back(node_to_output->abstract());
+    if (!input_names_vec.empty() && !output_names_vec.empty() && index < input_names_vec.size()) {
+      output_names_vec.emplace_back(input_names_vec[index]);
+    }
+  }
+  if (!output_names_vec.empty()) {
+    AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names_vec), cnode);
+  }
+  auto abstract_tuple = std::make_shared<abstract::AbstractTuple>(abstract_list);
+  cnode->set_abstract(abstract_tuple);
+}
+}  // namespace
+
+const AnfNodePtr AddInputToOutput::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
+                                           const EquivPtr &) const {
+  if (node == nullptr || !AnfAlgo::IsRealCNodeKernel(node)) {
+    return nullptr;
+  }
+  auto cnode = node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(cnode);
+  std::string op_name = AnfAlgo::GetCNodeName(cnode);
+  InputToOutputRegister reg;
+  if (!InputToOutputRegistry::Instance().GetRegisterByOpName(op_name, &reg)) {
+    return nullptr;
+  }
+  int output_num = op_finder_->GetOpRegisteredOutputNum(op_name);
+  // No need add output when it is not a tbe op.
+  if (output_num == -1) {
+    return nullptr;
+  }
+  // No need add output if the output num matches the registered output num for tbe.
+  if (AnfAlgo::GetOutputTensorNum(cnode) >= IntToSize(output_num)) {
+    return nullptr;
+  }
+  bool is_origin_tuple_output = AnfAlgo::IsTupleOutput(cnode);
+  AddOutputs(cnode, reg.input_indices());
+  // No need to create tuple_getitem if the origin output is a tuple because there has already been some tuple_getitems
+  // pointed to the outputs.
+  if (is_origin_tuple_output) {
+    return nullptr;
+  }
+  std::vector<AnfNodePtr> new_outputs;
+  auto new_abstract_tuple = dyn_cast<abstract::AbstractTuple>(cnode->abstract());
+  MS_EXCEPTION_IF_NULL(new_abstract_tuple);
+  CreateMultipleOutputsOfAnfNode(func_graph, cnode, new_abstract_tuple->size(), &new_outputs);
+  if (new_outputs.size() != new_abstract_tuple->size()) {
+    MS_LOG(EXCEPTION) << "Failed to create outputs of " << cnode->DebugString();
+  }
+  return new_outputs[0];
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/add_input_to_output.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/add_input_to_output.h
new file mode 100644
index 00000000000..6e5560bfb0f
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/add_input_to_output.h
@@ -0,0 +1,39 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_ADD_INPUT_TO_OUTPUT_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_ADD_INPUT_TO_OUTPUT_H_
+
+#include <string>
+#include <memory>
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+
+namespace mindspore {
+namespace opt {
+class AddInputToOutput : public PatternProcessPass {
+ public:
+  explicit AddInputToOutput(bool multigraph = true)
+      : PatternProcessPass("add_input_to_output", multigraph), op_finder_(std::make_shared<OpFinder>()) {}
+  ~AddInputToOutput() override = default;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+
+ private:
+  OpFinderPtr op_finder_;
+};
+}  // namespace opt
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_ADD_INPUT_TO_OUTPUT_H_
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.cc
index 1a62b7a5bef..51bcd880cd8 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.h"
+#include "backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pre_activate/common/helper.h"
+#include "frontend/operator/ops.h"
+#include "abstract/abstract_value.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.h
index 551fe0f6f90..46872aa959c 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_BATCHNORM_TO_BNINFER_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc
index 424d3a12c16..defb0113962 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h"
+#include "backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pre_activate/common/helper.h"
+#include "frontend/operator/ops.h"
+#include "abstract/abstract_value.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h
index 020dc1a9998..0676f8a040c 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_BATCHNORMGRAD_TO_BNINFERGRAD_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.cc
index 2af3afbf195..1d89bfd388a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
 
 #include <memory>
 #include <vector>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "common/utils.h"
 #include "utils/utils.h"
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h
index 126480603e8..9282b75527a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h
@@ -18,7 +18,7 @@
 
 #include <vector>
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_value_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_value_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.cc
index df94e897ec0..e1b0cb81e3b 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_value_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/clip_by_value_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.h"
 
 #include <memory>
 #include <vector>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_value_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_value_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.h
index 309b7cedd00..05bf713bdd4 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/clip_by_value_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_CLIP_BY_VALUE_FUSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.cc
index d49b2d47f36..6ccf3e29bde 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.h"
 #include <utility>
 #include <memory>
 #include <vector>
 #include <algorithm>
 #include <string>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pre_activate/common/helper.h"
+#include "abstract/abstract_value.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.h
index 170df5b0e42..932f0d28902 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_CONFUSION_MUL_GRAD_FUSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.cc
index 9e2c6374ce9..a8cf0af4657 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.h"
 
 #include <memory>
 #include <vector>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.h
index a4d0d1ce7aa..e3a86e22c9a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_CONFUSION_SOFTMAX_GRAD_RULE_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/derelu_fusion.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/derelu_fusion.cc
index 2f3c998bb8f..0fe042dc4e0 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/derelu_fusion.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/derelu_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/derelu_fusion.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pre_activate/common/helper.h"
+#include "abstract/abstract_value.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/derelu_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/derelu_fusion.h
index e1811f4db43..7506960ecbb 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/derelu_fusion.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_DERELU_FUSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.cc
similarity index 99%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.cc
index efc9ee79348..dbff0374f3c 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.h"
 #include <memory>
 #include <algorithm>
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.h
index f476e960627..b3bbedc36e7 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.h
@@ -19,7 +19,7 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/input_to_output_registry.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/input_to_output_registry.cc
new file mode 100644
index 00000000000..2fb42f9bd6a
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/input_to_output_registry.cc
@@ -0,0 +1,122 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/ascend/ir_fusion/input_to_output_registry.h"
+#include <utility>
+#include "utils/utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+
+namespace mindspore {
+namespace opt {
+namespace {
+bool ApplyRMSPropPreCheck(const CNodePtr &node) {
+  return !(AnfAlgo::GetPrevNodeOutputInferDataType(node, 0) != kNumberTypeFloat32);
+}
+
+bool FusedMulApplyMomentumPreCheck(const CNodePtr &node) {
+  TypeId data_type = AnfAlgo::GetPrevNodeOutputInferDataType(node, 0);
+  return !(data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16);
+}
+
+bool SparseApplyRMSPropPreCheck(const CNodePtr &node) {
+  return !(AnfAlgo::GetPrevNodeOutputInferDataType(node, 0) != kNumberTypeFloat32);
+}
+
+bool ApplyAdagradV2PreCheck(const CNodePtr &node) {
+  TypeId data_type = AnfAlgo::GetPrevNodeOutputInferDataType(node, 0);
+  return !(data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16);
+}
+
+bool ApplyKerasMomentumPreCheck(const CNodePtr &node) {
+  TypeId data_type = AnfAlgo::GetPrevNodeOutputInferDataType(node, 0);
+  return !(data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16);
+}
+
+bool SparseApplyFtrlPreCheck(const CNodePtr &node) {
+  return !(AnfAlgo::GetPrevNodeOutputInferDataType(node, 0) != kNumberTypeFloat32);
+}
+
+bool SparseApplyFtrlV2PreCheck(const CNodePtr &node) {
+  return !(AnfAlgo::GetPrevNodeOutputInferDataType(node, 0) != kNumberTypeFloat32);
+}
+
+bool SparseApplyAdagradV2PreCheck(const CNodePtr &node) {
+  return !(AnfAlgo::GetPrevNodeOutputInferDataType(node, 0) != kNumberTypeFloat32);
+}
+
+bool SparseApplyAdadeltaPreCheck(const CNodePtr &node) {
+  return !(AnfAlgo::GetPrevNodeOutputInferDataType(node, 0) != kNumberTypeFloat32);
+}
+}  // namespace
+InputToOutputRegistry::InputToOutputRegistry() {
+  Register(kApplyRMSPropOpName, {1, 2}, ApplyRMSPropPreCheck);
+  Register(kFusedMulApplyMomentumOpName, {1}, FusedMulApplyMomentumPreCheck);
+  Register(kApplyAdagradOpName, {1});
+  Register(kApplyAdagradDAName, {1, 2});
+  Register(kApplyAdadeltaOpName, {1, 2});
+  Register(kApplyPowerSignOpName, {1});
+  Register(kApplyProximalAdagradOpName, {1});
+  Register(kApplyAdaMaxOpName, {1, 2});
+  Register(kApplyAdagradV2OpName, {1}, ApplyAdagradV2PreCheck);
+  Register(kApplyKerasMomentumOpName, {1}, ApplyKerasMomentumPreCheck);
+  Register(kSparseApplyFtrlOpName, {1, 2}, SparseApplyFtrlPreCheck);
+  Register(kSparseApplyFtrlV2OpName, {1, 2}, SparseApplyFtrlV2PreCheck);
+  Register(kSparseApplyAdagradV2OpName, {1}, SparseApplyAdagradV2PreCheck);
+  Register(kSparseApplyProximalAdagradOpName, {1});
+  Register(kSparseApplyAdagradOpName, {1});
+  Register(kApplyFtrlV2OpName, {1, 2});
+  Register(kApplyMomentumOpName, {1});
+  Register(kApplyFtrlOpName, {1, 2});
+  Register(kApplyAdamOpName, {1, 2});
+  Register(kApplyCenteredRMSPropOpName, {1, 2, 3});
+  Register(kApplyAddSignOpName, {1});
+  Register(kSparseApplyRMSPropOpName, {1, 2}, SparseApplyRMSPropPreCheck);
+  Register(kSparseApplyAdadeltaOpName, {1, 2}, SparseApplyAdadeltaPreCheck);
+  Register(kApplyAdamWithAmsgradOpName, {1, 2});
+}
+
+InputToOutputRegistry &InputToOutputRegistry::Instance() {
+  static InputToOutputRegistry instance;
+  return instance;
+}
+
+void InputToOutputRegistry::Register(const InputToOutputRegister &reg) {
+  auto op_name = reg.op_name();
+  if (op_input_to_output_map_.find(op_name) == op_input_to_output_map_.end()) {
+    (void)op_input_to_output_map_.insert(make_pair(op_name, reg));
+    MS_LOG(DEBUG) << op_name << " input2output register successfully!";
+  }
+}
+
+void InputToOutputRegistry::Register(const std::string &op_name, const std::vector<size_t> &input_indices,
+                                     const PreCheckFunc &pre_check_func) {
+  if (op_input_to_output_map_.find(op_name) == op_input_to_output_map_.end()) {
+    InputToOutputRegister reg(op_name, pre_check_func);
+    reg.set_input_indices(input_indices);
+    (void)op_input_to_output_map_.insert(make_pair(op_name, reg));
+    MS_LOG(DEBUG) << op_name << " input2output register successfully!";
+  }
+}
+
+bool InputToOutputRegistry::GetRegisterByOpName(const std::string &op_name, InputToOutputRegister *reg) const {
+  if (op_input_to_output_map_.find(op_name) != op_input_to_output_map_.end()) {
+    *reg = op_input_to_output_map_.at(op_name);
+    MS_LOG(DEBUG) << op_name << " input2output find in registry.";
+    return true;
+  }
+  return false;
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/input_to_output_registry.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/input_to_output_registry.h
new file mode 100644
index 00000000000..45738c289c9
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/input_to_output_registry.h
@@ -0,0 +1,64 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_IR_FUSION_INPUT_TO_OUTPUT_REGISTRY_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_IR_FUSION_INPUT_TO_OUTPUT_REGISTRY_H_
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include <utility>
+#include "ir/anf.h"
+#include "common/utils.h"
+
+namespace mindspore {
+namespace opt {
+using PreCheckFunc = std::function<bool(const CNodePtr &node)>;
+class InputToOutputRegister {
+ public:
+  explicit InputToOutputRegister(
+    const std::string &op_name = "", const PreCheckFunc &pre_check_func = [](const CNodePtr &node) { return true; })
+      : op_name_(op_name), pre_check_func_(pre_check_func) {}
+  virtual ~InputToOutputRegister() = default;
+
+  void set_input_indices(const std::vector<size_t> &input_indices) { input_indices_ = input_indices; }
+
+  const std::vector<size_t> &input_indices() const { return input_indices_; }
+  const std::string &op_name() const { return op_name_; }
+
+ private:
+  std::string op_name_;
+  std::vector<size_t> input_indices_;
+  PreCheckFunc pre_check_func_;
+};
+
+class InputToOutputRegistry {
+ public:
+  static InputToOutputRegistry &Instance();
+  void Register(const InputToOutputRegister &reg);
+  void Register(
+    const std::string &op_name, const std::vector<size_t> &input_indices,
+    const PreCheckFunc &pre_check_func = [](const CNodePtr &node) { return true; });
+  bool GetRegisterByOpName(const std::string &op_name, InputToOutputRegister *reg) const;
+
+ private:
+  InputToOutputRegistry();
+  ~InputToOutputRegistry() = default;
+  DISABLE_COPY_AND_ASSIGN(InputToOutputRegistry)
+  std::unordered_map<std::string, InputToOutputRegister> op_input_to_output_map_;
+};
+}  // namespace opt
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_IR_FUSION_INPUT_TO_OUTPUT_REGISTRY_H_
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.cc
index 42e37df3e49..fd9fd31f12f 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/lamb_next_mv_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.h"
 #include <memory>
 #include <utility>
 #include <tuple>
 #include <algorithm>
 #include <unordered_map>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
-#include "operator/ops.h"
+#include "backend/optimizer/common/helper.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.h
index 0089c33f870..d14ce6e3fef 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_rule.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.h
@@ -23,9 +23,9 @@
 #include <unordered_map>
 #include <memory>
 #include "ir/anf.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc
index 0e3cd28a665..4ef3fa269f8 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h"
 #include <utility>
-#include "session/anf_runtime_algorithm.h"
-#include "optimizer/opt.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/opt.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h
index 5d619751971..23114c37eee 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h
@@ -19,8 +19,8 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc
index 26828f2137c..f21433b3c64 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h"
 
 #include <vector>
 #include <string>
 #include <tuple>
 #include <utility>
 
-#include "session/anf_runtime_algorithm.h"
-#include "optimizer/opt.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/opt.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h
index ff14a253ddc..58f05c37ba8 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h
@@ -18,8 +18,8 @@
 
 #include <vector>
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.cc
index 5065c4c5bab..03bc1e04849 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/lamb_next_right_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.h"
 #include <vector>
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.h
index 3d15001da24..67687cc037e 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_next_right_rule.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_LAMB_NEXT_RIGHT_RULE_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc
index b5b6d2bb085..8e38c3cc2eb 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h"
 
 #include <memory>
 #include <vector>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "common/utils.h"
 #include "utils/utils.h"
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h
index cb3939549ff..5ea01ccf659 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_LAMB_UPDATE_WITH_LR_RULE_FUSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.cc
index 43e18721630..59511a611ac 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.h"
 #include <memory>
 #include <algorithm>
 #include <unordered_map>
 #include "utils/utils.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.h
index ea614d3d2d9..c5396178a5c 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.h
@@ -23,9 +23,9 @@
 #include <unordered_map>
 #include <memory>
 #include "ir/anf.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc
index b16387d8f1d..fa1e92120de 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h
index 2655c0f14de..5bf16081433 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h
@@ -17,9 +17,9 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_LAYER_NORM_BETA_GAMMA_BACKPROP_FUSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.cc
index e81c804b713..fdd390677af 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.h"
 #include <memory>
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.h
index 56675243de5..8c762435a95 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_MATMUL_BIASADD_FUSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_MATMUL_BIASADD_FUSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.cc
index e7a73a9c7f2..90c5ac19a9e 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.h"
 #include <memory>
 #include <vector>
 #include <string>
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.h
index c092e0ca22f..8d36684a118 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_MOMENTUM_LOSSSCALE_FUSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_MOMENTUM_LOSSSCALE_FUSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_add_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_add_fusion.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_add_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_add_fusion.cc
index 2536255fc1f..2d766891a06 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_add_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_add_fusion.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/mul_add_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/mul_add_fusion.h"
 #include <vector>
 #include <memory>
 #include <string>
 #include <tuple>
 #include <utility>
-#include "session/anf_runtime_algorithm.h"
-#include "optimizer/opt.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/opt.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_add_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_add_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_add_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_add_fusion.h
index 4b4db2b3124..0ad13e10e6e 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_add_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_add_fusion.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_MUL_ADD_FUSION_H
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_MUL_ADD_FUSION_H
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_addn_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_addn_fusion.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_addn_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_addn_fusion.cc
index a5e4675c8f9..3567864e2fe 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_addn_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_addn_fusion.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/mul_addn_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/mul_addn_fusion.h"
 #include <vector>
 #include <memory>
 #include <string>
 #include <tuple>
 #include <utility>
-#include "session/anf_runtime_algorithm.h"
-#include "optimizer/opt.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/opt.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_addn_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_addn_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_addn_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_addn_fusion.h
index d03309bf73c..484cb75237f 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_addn_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_addn_fusion.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_PASS_MUL_ADDN_FUSION_H
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_PASS_MUL_ADDN_FUSION_H
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.cc
index a3c87dad5d8..0c2667e4d9d 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
-#include "device/kernel_info.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "frontend/operator/ops.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
@@ -63,7 +63,7 @@ const AnfNodePtr ParamTransRoad(const FuncGraphPtr &func_graph, const AnfNodePtr
 kernel::KernelBuildInfoPtr GetKernelBuildInfo(const CNodePtr &cast, const string &format, TypeId input_type,
                                               TypeId output_type) {
   MS_EXCEPTION_IF_NULL(cast);
-  auto kernel_info = cast->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(cast->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto cast_build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(cast_build_info);
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.h
index 823ec083b18..0479fd3d635 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/parameter_and_transop_fusion.h
@@ -22,7 +22,7 @@
 #include <utility>
 #include <memory>
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
+#include "backend/optimizer/common/pass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/refresh_parameter_format.cc
similarity index 86%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/refresh_parameter_format.cc
index 857670a384c..ebaa429ebfe 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/refresh_parameter_format.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/ascend/ir_fusion/refresh_parameter_format.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
-#include "device/kernel_info.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "frontend/operator/ops.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/refresh_parameter_format.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/refresh_parameter_format.h
index 0ba688b134a..122bdf55cad 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/refresh_parameter_format.h
@@ -21,7 +21,7 @@
 #include <memory>
 #include <utility>
 #include "ir/anf.h"
-#include "pre_activate/common/pass.h"
+#include "backend/optimizer/common/pass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/remove_reshape_pair.cc
similarity index 92%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/remove_reshape_pair.cc
index fa2815ff62e..6f48eabbc5b 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/remove_reshape_pair.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/remove_reshape_pair.h"
+#include "backend/optimizer/ascend/ir_fusion/remove_reshape_pair.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/remove_reshape_pair.h
similarity index 90%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/remove_reshape_pair.h
index ddb25df70c6..848713201a0 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/remove_reshape_pair.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/remove_reshape_pair.h
@@ -20,9 +20,9 @@
 #include <utility>
 #include <memory>
 #include "ir/anf.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/reshape_transpose_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.cc
similarity index 93%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/reshape_transpose_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.cc
index 9b130027987..02a866930c8 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/reshape_transpose_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/reshape_transpose_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
-#include "operator/ops.h"
+#include "backend/optimizer/common/helper.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/reshape_transpose_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/reshape_transpose_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.h
index 5abf3e0d531..a76538019ec 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/reshape_transpose_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.h
@@ -22,9 +22,9 @@
 #include <utility>
 #include <memory>
 #include "ir/anf.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.cc
index f95406e5e10..a3706bfb68d 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.h
index 59032e69733..1b884b27265 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.h
@@ -18,7 +18,7 @@
 
 #include <memory>
 #include <string>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/square_sum_fusion.cc
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/square_sum_fusion.cc
index 6261b63882f..67c881759a8 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/square_sum_fusion.cc
@@ -13,19 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/square_sum_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/square_sum_fusion.h"
 
 #include <memory>
 #include <vector>
 #include <tuple>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
-#include "common/utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
-#include "pre_activate/common/helper.h"
-#include "device/kernel_info.h"
+#include "frontend/operator/ops.h"
+#include "backend/optimizer/common/helper.h"
+#include "runtime/device/kernel_info.h"
 
 namespace mindspore {
 namespace opt {
@@ -51,7 +50,7 @@ CNodePtr GenerateSquareSumV1(const FuncGraphPtr &graph, const CNodePtr &square,
   square_sumv1->set_scope(sum->scope());
   AnfAlgo::CopyNodeAttr(kAttrAxis, sum, square_sumv1);
   AnfAlgo::CopyNodeAttr(kAttrKeepDims, sum, square_sumv1);
-  auto names = MakeValue<std::vector<std::string>>({prim::kPrimSquare->name(), prim::kPrimReduceSum->name()});
+  auto names = MakeValue<std::vector<std::string>>({square->fullname_with_scope(), sum->fullname_with_scope()});
   AnfAlgo::SetNodeAttr(kAttrDatadumpOriginalNames, names, square_sumv1);
   return square_sumv1;
 }
@@ -74,7 +73,7 @@ CNodePtr GenerateSquareSumV2(const FuncGraphPtr &graph, const CNodePtr &square,
   square_sumv2->set_scope(sum->scope());
   AnfAlgo::CopyNodeAttr(kAttrAxis, sum, square_sumv2);
   AnfAlgo::CopyNodeAttr(kAttrKeepDims, sum, square_sumv2);
-  auto names = MakeValue<std::vector<std::string>>({prim::kPrimSquare->name(), prim::kPrimReduceSum->name()});
+  auto names = MakeValue<std::vector<std::string>>({square->fullname_with_scope(), sum->fullname_with_scope()});
   AnfAlgo::SetNodeAttr(kAttrDatadumpOriginalNames, names, square_sumv2);
   return square_sumv2;
 }
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/square_sum_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/square_sum_fusion.h
index 5a694a55858..54189606ba4 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/square_sum_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/square_sum_fusion.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_SQUARE_SUM_FUSION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_SQUARE_SUM_FUSION_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_reshape_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.cc
similarity index 93%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_reshape_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.cc
index 250f86d9b1e..46bf2a86048 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_reshape_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/transpose_reshape_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
-#include "operator/ops.h"
+#include "backend/optimizer/common/helper.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_reshape_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_reshape_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.h
index 8b979f869d7..39b8fe4687d 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_reshape_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.h
@@ -22,9 +22,9 @@
 #include <utility>
 #include <memory>
 #include "ir/anf.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.cc
index e45fc2637fe..b6da588e895 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.h"
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.h
similarity index 89%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h
rename to mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.h
index 833588cf455..852d5194ec7 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.h
@@ -22,10 +22,10 @@
 #include <utility>
 #include <memory>
 #include "ir/anf.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ascend_helper.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc b/mindspore/ccsrc/backend/optimizer/common/common_backend_optimization.cc
similarity index 83%
rename from mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc
rename to mindspore/ccsrc/backend/optimizer/common/common_backend_optimization.cc
index b930ac69c9f..887b9a76a12 100644
--- a/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/common_backend_optimization.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/common/common_backend_optimization.h"
+#include "backend/optimizer/common/common_backend_optimization.h"
 #include <memory>
 #include <string>
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/pass/convert_const_input_to_attr.h"
-#include "pre_activate/pass/convert_tuple_output_to_maketuple.h"
-#include "pre_activate/pass/convert_const_input_to_tensor_input.h"
-#include "pre_activate/pass/convert_tuple_input_to_dynamic_input.h"
-#include "pre_activate/pass/const_to_attr_strided_slice_grad.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/pass/convert_const_input_to_attr.h"
+#include "backend/optimizer/pass/convert_tuple_output_to_maketuple.h"
+#include "backend/optimizer/pass/convert_const_input_to_tensor_input.h"
+#include "backend/optimizer/pass/convert_tuple_input_to_dynamic_input.h"
+#include "backend/optimizer/pass/const_to_attr_strided_slice_grad.h"
 #include "utils/context/ms_context.h"
 #include "debug/anf_ir_dump.h"
 
diff --git a/mindspore/ccsrc/pre_activate/common/common_backend_optimization.h b/mindspore/ccsrc/backend/optimizer/common/common_backend_optimization.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/common/common_backend_optimization.h
rename to mindspore/ccsrc/backend/optimizer/common/common_backend_optimization.h
index 6ce92da0dcd..4127fc05dec 100644
--- a/mindspore/ccsrc/pre_activate/common/common_backend_optimization.h
+++ b/mindspore/ccsrc/backend/optimizer/common/common_backend_optimization.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_COMMON_BACKEND_OPTIMIZATION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_COMMON_BACKEND_OPTIMIZATION_H_
 #include <memory>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 namespace mindspore {
 namespace opt {
 void BackendCommonOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph);
diff --git a/mindspore/ccsrc/pre_activate/common/fusion_id_allocator.cc b/mindspore/ccsrc/backend/optimizer/common/fusion_id_allocator.cc
similarity index 93%
rename from mindspore/ccsrc/pre_activate/common/fusion_id_allocator.cc
rename to mindspore/ccsrc/backend/optimizer/common/fusion_id_allocator.cc
index 2b45fc65795..d21cabe54a1 100644
--- a/mindspore/ccsrc/pre_activate/common/fusion_id_allocator.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/fusion_id_allocator.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/common/fusion_id_allocator.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/fusion_id_allocator.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/fusion_id_allocator.h b/mindspore/ccsrc/backend/optimizer/common/fusion_id_allocator.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/common/fusion_id_allocator.h
rename to mindspore/ccsrc/backend/optimizer/common/fusion_id_allocator.h
index 91e83600f29..bdee5ee84ae 100644
--- a/mindspore/ccsrc/pre_activate/common/fusion_id_allocator.h
+++ b/mindspore/ccsrc/backend/optimizer/common/fusion_id_allocator.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_FUSION_ID_ALLOCATOR_H_
 
 #include <memory>
-#include "ir/base.h"
+#include "base/base.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/helper.cc b/mindspore/ccsrc/backend/optimizer/common/helper.cc
similarity index 99%
rename from mindspore/ccsrc/pre_activate/common/helper.cc
rename to mindspore/ccsrc/backend/optimizer/common/helper.cc
index e1db0ed6ed9..266130c6b15 100644
--- a/mindspore/ccsrc/pre_activate/common/helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/helper.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 #include <string>
 #include <utility>
 #include <unordered_set>
@@ -24,10 +24,10 @@
 #include <deque>
 #include "utils/utils.h"
 #include "utils/base_ref.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "common/utils.h"
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/common/helper.h b/mindspore/ccsrc/backend/optimizer/common/helper.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/common/helper.h
rename to mindspore/ccsrc/backend/optimizer/common/helper.h
index 49a1d47d0c3..a267e65b530 100644
--- a/mindspore/ccsrc/pre_activate/common/helper.h
+++ b/mindspore/ccsrc/backend/optimizer/common/helper.h
@@ -23,9 +23,9 @@
 #include <set>
 #include <unordered_set>
 #include "ir/func_graph.h"
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include "common/utils.h"
-#include "pre_activate/common/pattern_engine.h"
+#include "backend/optimizer/common/pattern_engine.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/node_pass.cc b/mindspore/ccsrc/backend/optimizer/common/node_pass.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/common/node_pass.cc
rename to mindspore/ccsrc/backend/optimizer/common/node_pass.cc
index 876da8667ba..16f5284a570 100644
--- a/mindspore/ccsrc/pre_activate/common/node_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/node_pass.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/common/node_pass.h"
+#include "backend/optimizer/common/node_pass.h"
 
 #include <unordered_set>
 #include <deque>
@@ -22,7 +22,7 @@
 #include "ir/anf.h"
 #include "ir/func_graph.h"
 #include "ir/manager.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/node_pass.h b/mindspore/ccsrc/backend/optimizer/common/node_pass.h
similarity index 94%
rename from mindspore/ccsrc/pre_activate/common/node_pass.h
rename to mindspore/ccsrc/backend/optimizer/common/node_pass.h
index 7750a59e598..780ae1a0566 100644
--- a/mindspore/ccsrc/pre_activate/common/node_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/common/node_pass.h
@@ -18,7 +18,7 @@
 #include <string>
 #include <memory>
 
-#include "pre_activate/common/pass.h"
+#include "backend/optimizer/common/pass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/optimizer.cc b/mindspore/ccsrc/backend/optimizer/common/optimizer.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/common/optimizer.cc
rename to mindspore/ccsrc/backend/optimizer/common/optimizer.cc
index 71a523ea1d0..01e9111e867 100644
--- a/mindspore/ccsrc/pre_activate/common/optimizer.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/optimizer.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 #include <functional>
 #include <memory>
@@ -23,8 +23,8 @@
 #include <utility>
 #include <initializer_list>
 
-#include "pre_activate/common/pass_manager.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/manager.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/common/optimizer.h b/mindspore/ccsrc/backend/optimizer/common/optimizer.h
similarity index 95%
rename from mindspore/ccsrc/pre_activate/common/optimizer.h
rename to mindspore/ccsrc/backend/optimizer/common/optimizer.h
index 1f9961df6b8..0b03c9c0eeb 100644
--- a/mindspore/ccsrc/pre_activate/common/optimizer.h
+++ b/mindspore/ccsrc/backend/optimizer/common/optimizer.h
@@ -24,11 +24,11 @@
 #include "ir/anf.h"
 #include "ir/func_graph.h"
 #include "ir/primitive.h"
-#include "pre_activate/common/pass_manager.h"
-#include "pre_activate/common/pattern_engine.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/optimizer/common/pattern_engine.h"
 #include "utils/graph_utils.h"
 #include "common/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/pass.h b/mindspore/ccsrc/backend/optimizer/common/pass.h
similarity index 94%
rename from mindspore/ccsrc/pre_activate/common/pass.h
rename to mindspore/ccsrc/backend/optimizer/common/pass.h
index 3d2468cddb0..6e35fb1dc4a 100644
--- a/mindspore/ccsrc/pre_activate/common/pass.h
+++ b/mindspore/ccsrc/backend/optimizer/common/pass.h
@@ -19,7 +19,7 @@
 #include <string>
 
 #include "ir/anf.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/pass_manager.cc b/mindspore/ccsrc/backend/optimizer/common/pass_manager.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/common/pass_manager.cc
rename to mindspore/ccsrc/backend/optimizer/common/pass_manager.cc
index 3213b8a6d23..f9f41237e09 100644
--- a/mindspore/ccsrc/pre_activate/common/pass_manager.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/pass_manager.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/common/pass_manager.h"
+#include "backend/optimizer/common/pass_manager.h"
 
 #include <sys/time.h>
 #include <unordered_set>
diff --git a/mindspore/ccsrc/pre_activate/common/pass_manager.h b/mindspore/ccsrc/backend/optimizer/common/pass_manager.h
similarity index 93%
rename from mindspore/ccsrc/pre_activate/common/pass_manager.h
rename to mindspore/ccsrc/backend/optimizer/common/pass_manager.h
index 38fe49b94c7..51db27d2509 100644
--- a/mindspore/ccsrc/pre_activate/common/pass_manager.h
+++ b/mindspore/ccsrc/backend/optimizer/common/pass_manager.h
@@ -21,8 +21,8 @@
 #include <string>
 #include <memory>
 
-#include "pre_activate/common/pass.h"
-#include "pre_activate/common/node_pass.h"
+#include "backend/optimizer/common/pass.h"
+#include "backend/optimizer/common/node_pass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/common/pattern_engine.cc b/mindspore/ccsrc/backend/optimizer/common/pattern_engine.cc
similarity index 99%
rename from mindspore/ccsrc/pre_activate/common/pattern_engine.cc
rename to mindspore/ccsrc/backend/optimizer/common/pattern_engine.cc
index 42f966aa3d6..bd4efd82ef8 100644
--- a/mindspore/ccsrc/pre_activate/common/pattern_engine.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/pattern_engine.cc
@@ -16,14 +16,14 @@
  * limitations under the License.
  */
 
-#include "pre_activate/common/pattern_engine.h"
+#include "backend/optimizer/common/pattern_engine.h"
 
 #include <exception>
 #include <iostream>
 #include <functional>
 #include <iterator>
 
-#include "optimizer/opt.h"
+#include "frontend/optimizer/opt.h"
 
 #include "ir/anf.h"
 #include "utils/convert_utils_base.h"
diff --git a/mindspore/ccsrc/pre_activate/common/pattern_engine.h b/mindspore/ccsrc/backend/optimizer/common/pattern_engine.h
similarity index 99%
rename from mindspore/ccsrc/pre_activate/common/pattern_engine.h
rename to mindspore/ccsrc/backend/optimizer/common/pattern_engine.h
index 858b1aecb88..51fa8801b2d 100644
--- a/mindspore/ccsrc/pre_activate/common/pattern_engine.h
+++ b/mindspore/ccsrc/backend/optimizer/common/pattern_engine.h
@@ -33,8 +33,8 @@
 #include <list>
 #include <utility>
 
-#include "pre_activate/common/visit.h"
-#include "ir/base.h"
+#include "backend/optimizer/common/visit.h"
+#include "base/base.h"
 #include "utils/log_adapter.h"
 #include "utils/base_ref.h"
 
diff --git a/mindspore/ccsrc/pre_activate/common/visit.cc b/mindspore/ccsrc/backend/optimizer/common/visit.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/common/visit.cc
rename to mindspore/ccsrc/backend/optimizer/common/visit.cc
index 179177dd67e..d0b52609f82 100644
--- a/mindspore/ccsrc/pre_activate/common/visit.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/visit.cc
@@ -16,14 +16,14 @@
  * limitations under the License.
  */
 
-#include "pre_activate/common/visit.h"
+#include "backend/optimizer/common/visit.h"
 
 #include <vector>
 #include <memory>
 #include <algorithm>
 #include <utility>
 
-#include "pre_activate/common/pattern_engine.h"
+#include "backend/optimizer/common/pattern_engine.h"
 #include "utils/any.h"
 #include "ir/anf.h"
 #include "ir/func_graph.h"
diff --git a/mindspore/ccsrc/pre_activate/common/visit.h b/mindspore/ccsrc/backend/optimizer/common/visit.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/common/visit.h
rename to mindspore/ccsrc/backend/optimizer/common/visit.h
index 2017b03b2fe..9799d3f9c10 100644
--- a/mindspore/ccsrc/pre_activate/common/visit.h
+++ b/mindspore/ccsrc/backend/optimizer/common/visit.h
@@ -26,7 +26,7 @@
 #include <string>
 #include <memory>
 
-#include "ir/base.h"
+#include "base/base.h"
 #include "utils/base_ref.h"
 
 // namespace to support utils definition
diff --git a/mindspore/ccsrc/pre_activate/gpu/adam_fusion.cc b/mindspore/ccsrc/backend/optimizer/gpu/adam_fusion.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/gpu/adam_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/gpu/adam_fusion.cc
index 8111ee429d9..41e4abee27a 100644
--- a/mindspore/ccsrc/pre_activate/gpu/adam_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/adam_fusion.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/gpu/adam_fusion.h"
+#include "backend/optimizer/gpu/adam_fusion.h"
 
 #include <memory>
 #include <vector>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/gpu/adam_fusion.h b/mindspore/ccsrc/backend/optimizer/gpu/adam_fusion.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/gpu/adam_fusion.h
rename to mindspore/ccsrc/backend/optimizer/gpu/adam_fusion.h
index d8c10a0986a..f87defc04ca 100644
--- a/mindspore/ccsrc/pre_activate/gpu/adam_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/gpu/adam_fusion.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_FUSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/gpu/adam_weight_decay_fusion.cc b/mindspore/ccsrc/backend/optimizer/gpu/adam_weight_decay_fusion.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/gpu/adam_weight_decay_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/gpu/adam_weight_decay_fusion.cc
index c950cbd56fd..c95945c9806 100644
--- a/mindspore/ccsrc/pre_activate/gpu/adam_weight_decay_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/adam_weight_decay_fusion.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/gpu/adam_weight_decay_fusion.h"
+#include "backend/optimizer/gpu/adam_weight_decay_fusion.h"
 
 #include <memory>
 #include <vector>
 #include <string>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/gpu/adam_weight_decay_fusion.h b/mindspore/ccsrc/backend/optimizer/gpu/adam_weight_decay_fusion.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/gpu/adam_weight_decay_fusion.h
rename to mindspore/ccsrc/backend/optimizer/gpu/adam_weight_decay_fusion.h
index 0ada5756e30..53477ec898c 100644
--- a/mindspore/ccsrc/pre_activate/gpu/adam_weight_decay_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/gpu/adam_weight_decay_fusion.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_WEIGHT_DECAY_FUSION_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/kernel_refcount.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.cc
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/kernel_refcount.cc
index c75860a8df7..b531b0caa58 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/kernel_refcount.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/mem_reuse/kernel_refcount.h"
+#include "backend/optimizer/mem_reuse/kernel_refcount.h"
 #include <algorithm>
 #include "utils/log_adapter.h"
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/kernel_refcount.h
similarity index 100%
rename from mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.h
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/kernel_refcount.h
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_copy_manager.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_copy_manager.h
similarity index 97%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_copy_manager.h
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_copy_manager.h
index ea9947b41b0..1952415515f 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_copy_manager.h
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_copy_manager.h
@@ -22,8 +22,8 @@
 #include <queue>
 #include <memory>
 #include <utility>
-#include "session/kernel_graph.h"
-#include "kernel/kernel.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/kernel.h"
 
 using HostAddress = mindspore::kernel::Address;
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc
index 095f8f6495a..8f705be556b 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/mem_reuse/mem_dynamic_allocator.h"
+#include "backend/optimizer/mem_reuse/mem_dynamic_allocator.h"
 #include "common/utils.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
@@ -184,14 +184,16 @@ DynamicMemBlockPtr DynamicMemPoolBestFit::FindMemBlock(const DeviceMemPtr device
   if (iter != global_mem_block_list_.begin()) {
     return *(--iter);
   }
-  MS_LOG(ERROR) << "Can't find the mem_block of the device address[" << device_addr << "].";
   return nullptr;
 }
 
 void DynamicMemPoolBestFit::FreeTensorMem(const DeviceMemPtr device_addr) {
   MS_EXCEPTION_IF_NULL(device_addr);
   auto mem_block = FindMemBlock(device_addr);
-  MS_EXCEPTION_IF_NULL(mem_block);
+  if (mem_block == nullptr) {
+    MS_LOG(WARNING) << "Can't find the mem_block of the device address[" << device_addr << "].";
+    return;
+  }
   CombineMemBuf(mem_block, device_addr);
 }
 
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h
similarity index 100%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.h
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.cc
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse.cc
index d550b77bba2..263ceaec63b 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "pre_activate/mem_reuse/mem_reuse.h"
+#include "backend/optimizer/mem_reuse/mem_reuse.h"
 #include <algorithm>
 #include <memory>
-#include "pre_activate/mem_reuse/mem_reuse_checker.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace memreuse {
@@ -329,22 +329,25 @@ void MemReuseUtil::SetSummaryNodesRefCount() {
     return;
   }
 
+  size_t total_summary_size = 0;
   for (auto &node_item : summary_nodes) {
     auto node = node_item.second.first;
     size_t index = IntToSize(node_item.second.second);
-    MS_LOG(INFO) << "set summary node's ref count, node: " << node->fullname_with_scope() << " index: " << index;
     if (kernel_output_refs_.find(node.get()) != kernel_output_refs_.end()) {
       KernelRefCountPtr kernel_ref = kernel_output_refs_[node.get()][index];
       kernel_ref->ref_count_ = kMaxRefCount;
       kernel_ref->ref_count_dynamic_use_ = kMaxRefCount;
+      total_summary_size += kernel_ref->size_;
+      MS_LOG(INFO) << "Set summary node's ref count, node: " << node->fullname_with_scope() << " index: " << index;
     } else {
-      MS_LOG(WARNING) << "can't find summary node's kernel_def " << node->fullname_with_scope();
+      MS_LOG(WARNING) << "Can't find summary node's kernel_def " << node->fullname_with_scope() << " index: " << index;
     }
   }
 #ifdef MEM_REUSE_DEBUG
   auto graph = *graph_;
   MemReuseChecker::GetInstance().CheckMemReuseIR(total_refs_list_, kernel_def_ptr_list_, &graph);
 #endif
+  MS_LOG(INFO) << "Special Tensor total size: SummaryNodes: " << total_summary_size;
 }
 
 void MemReuseUtil::SetGraphOutputRefCount() {
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse.h
similarity index 95%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.h
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse.h
index 37281a7128b..b286bcbc2c6 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.h
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse.h
@@ -19,10 +19,10 @@
 #include <map>
 #include <memory>
 #include <vector>
-#include "pre_activate/mem_reuse/kernel_refcount.h"
-#include "session/anf_runtime_algorithm.h"
-#include "session/kernel_graph.h"
-#include "kernel/tbe/tbe_utils.h"
+#include "backend/optimizer/mem_reuse/kernel_refcount.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
 using mindspore::kernel::tbe::TbeUtils;
 namespace mindspore {
 namespace memreuse {
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc
similarity index 84%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc
index b36147f9bbc..d1a50a0dfe4 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc
@@ -13,10 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-#include "pre_activate/mem_reuse/mem_reuse_allocator.h"
-#include "pre_activate/mem_reuse/mem_reuse.h"
-#include "pre_activate/mem_reuse/mem_reuse_checker.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
+#include "backend/optimizer/mem_reuse/mem_reuse.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
+#ifdef ENABLE_D
+#include "runtime/device/ascend/ascend_stream_assign.h"
+#endif
+#ifdef ENABLE_DEBUGGER
+#include "debug/debugger/debugger.h"
+#include "debug/debug_services.h"
+#endif
 
 namespace mindspore {
 namespace memreuse {
@@ -34,6 +40,9 @@ void BestFitMemReuse::InitMemReuseInfo(const MemReuseUtil *mem_reuse_util_ptr) {
     wk->size_ = AlignMemorySize(wk->size_);
     wk->ref_count_ = 1;
   }
+#ifdef ENABLE_D
+  stream_groups_ = device::ascend::AscendStreamAssign::GetInstance().get_stream_group();
+#endif
 }
 
 void BestFitMemReuse::InitKernelDependence() {
@@ -63,21 +72,67 @@ void BestFitMemReuse::InitKernelDependence() {
   }
 }
 
-bool BestFitMemReuse::IsUsable(const KernelDefPtr &kernel_curr, const KernelDefPtr &kernel_prev) {
+bool BestFitMemReuse::IsUsable(const KernelDefPtr &kernel_curr, const MembufPtr &mem_buf) {
   // determine whether the kernel_curr can reuse kernel_prev's output tensor membuf
   MS_EXCEPTION_IF_NULL(kernel_curr);
+  MS_EXCEPTION_IF_NULL(mem_buf);
+  auto kernel_prev = mem_buf->used_kernel_;
   MS_EXCEPTION_IF_NULL(kernel_prev);
+#ifdef ENABLE_DEBUGGER
+  auto debugger_ = mindspore::Debugger::GetInstance();
+  DebugServices *debug_services = debugger_->debug_services();
+  auto watchpoint_table = debug_services->GetWatchpointTable();
+  std::string current_kernel_name = kernel_curr->scope_full_name();
+  if (debug_services->IsWatchPoint(current_kernel_name, watchpoint_table)) {
+    return false;
+  }
+#endif
   auto curr_stream_id = kernel_curr->stream_id();
   auto prev_stream_id = kernel_prev->stream_id();
   if (curr_stream_id == prev_stream_id) {
+    mem_buf->type_ = IN_STREAM_REUSE;
     return true;
   }
+
+  bool reuse_between_streams = true;
+  for (auto &stream_group : stream_groups_) {
+    size_t cur_index = UINT32_MAX;
+    size_t prev_index = UINT32_MAX;
+    for (size_t index = 0; index < stream_group.size(); index++) {
+      if (curr_stream_id == stream_group[index]) {
+        cur_index = index;
+        continue;
+      }
+      if (prev_stream_id == stream_group[index]) {
+        prev_index = index;
+        continue;
+      }
+    }
+    if ((prev_index != UINT32_MAX) && (cur_index == UINT32_MAX || (prev_index > cur_index))) {
+      // previous stream and current stream are not in the same group can't be reused
+      // previous stream is behind current stream can't be reused
+      reuse_between_streams = false;
+      break;
+    }
+  }
+
+  if (reuse_between_streams) {
+    mem_buf->type_ = BETWEEN_STREAMS_REUSE;
+    return true;
+  }
+
   auto iter = kernel_front_map_.find(kernel_curr);
   if (iter == kernel_front_map_.end()) {
     MS_LOG(EXCEPTION) << kernel_curr->scope_full_name() << " is not init.";
   }
   auto kernel_curr_front = iter->second;
-  return kernel_curr_front.count(kernel_prev);
+  auto depend_count = kernel_curr_front.count(kernel_prev);
+  if (depend_count) {
+    mem_buf->type_ = KERNEL_DEPENDENCE_REUSE;
+    return true;
+  }
+
+  return false;
 }
 
 void BestFitMemReuse::AssignNodeOutputOffset() {
@@ -135,7 +190,7 @@ std::map<size_t, size_t> BestFitMemReuse::GetReusableMembufMap(size_t tensor_siz
     auto membuf = membuf_ptr_list_[i];
     auto index = i;
     bool is_membuf_ok = membuf->status_ == kUnused && membuf->size_ >= tensor_size;
-    if (is_membuf_ok && IsUsable(current_kernel_, membuf->used_kernel_)) {
+    if (is_membuf_ok && IsUsable(current_kernel_, membuf)) {
       (void)size_map.insert(std::make_pair(membuf->size_, index));
       break;
     }
@@ -163,8 +218,8 @@ void BestFitMemReuse::SplitMembuf(const KernelRefCount *tensor_desc, size_t memb
   auto bias = membuf->size_ - tensor_desc->size_;
   membuf->size_ = tensor_desc->size_;
   // to check if spilt membuf can be merge
-  auto new_membuf =
-    std::make_shared<Membuf>(kUnused, bias, membuf->offset_ + membuf->size_, kInvalidIndex, current_kernel_);
+  auto new_membuf = std::make_shared<Membuf>(kUnused, bias, membuf->offset_ + membuf->size_, kInvalidIndex,
+                                             membuf->type_, current_kernel_);
   (void)membuf_ptr_list_.insert(membuf_ptr_list_.begin() + SizeToInt(membuf_index + 1), new_membuf);
 }
 
@@ -176,7 +231,7 @@ void BestFitMemReuse::AddNewMembufPtr(KernelRefCount *tensor_desc, int flag) {
   }
   auto membuf_size = tensor_desc->size_;
   auto real_index = GetRealIndex(IntToSize(tensor_desc->index_), flag);
-  auto membuf = std::make_shared<Membuf>(kReused, membuf_size, membuf_offset, real_index, current_kernel_);
+  auto membuf = std::make_shared<Membuf>(kReused, membuf_size, membuf_offset, real_index, NEW, current_kernel_);
   membuf_ptr_list_.push_back(membuf);
   tensor_desc->offset_ = membuf_offset;
 }
@@ -242,7 +297,7 @@ void BestFitMemReuse::ReleaseMembuf(size_t tensor_index, int flag) {
     auto membuf_next = (*next_iter);
     MS_EXCEPTION_IF_NULL(membuf_next);
     if (membuf_next->status_ == kUnused) {
-      bool is_merge = IsUsable(current_kernel_, membuf_next->used_kernel_);
+      bool is_merge = IsUsable(current_kernel_, membuf_next);
       if (is_merge) {
         membuf->size_ += membuf_next->size_;
         (void)membuf_ptr_list_.erase(next_iter);
@@ -254,7 +309,7 @@ void BestFitMemReuse::ReleaseMembuf(size_t tensor_index, int flag) {
     auto membuf_prev = (*prev_iter);
     MS_EXCEPTION_IF_NULL(membuf_prev);
     if (membuf_prev->status_ == kUnused) {
-      bool is_merge = IsUsable(current_kernel_, membuf_prev->used_kernel_);
+      bool is_merge = IsUsable(current_kernel_, membuf_prev);
       if (is_merge) {
         membuf->size_ += membuf_prev->size_;
         membuf->offset_ = membuf_prev->offset_;
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.h
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.h
index 9aeda05dc37..ef1cfd3e111 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.h
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.h
@@ -30,8 +30,8 @@
 #include <unordered_set>
 #include <set>
 #include <queue>
-#include "pre_activate/mem_reuse/kernel_refcount.h"
-#include "pre_activate/mem_reuse/mem_reuse.h"
+#include "backend/optimizer/mem_reuse/kernel_refcount.h"
+#include "backend/optimizer/mem_reuse/mem_reuse.h"
 
 namespace mindspore {
 namespace memreuse {
@@ -40,11 +40,12 @@ static constexpr int kDynamicMem = -1;
 static constexpr int kWorkspaceMem = 1;
 static constexpr size_t kTotalSize = 0;
 enum Status { kUnused, kReused };
+enum MEMTYPE { NEW, IN_STREAM_REUSE, BETWEEN_STREAMS_REUSE, KERNEL_DEPENDENCE_REUSE };
 class Membuf {
  public:
   Membuf() = default;
-  Membuf(Status status, size_t size, size_t offset, int index, const KernelDefPtr &used_kernel)
-      : status_(status), size_(size), offset_(offset), index_(index), used_kernel_(used_kernel) {}
+  Membuf(Status status, size_t size, size_t offset, int index, MEMTYPE type, const KernelDefPtr &used_kernel)
+      : status_(status), size_(size), offset_(offset), index_(index), type_(type), used_kernel_(used_kernel) {}
   ~Membuf() = default;
   // Memory block status flags
   Status status_ = kUnused;
@@ -52,6 +53,7 @@ class Membuf {
   size_t offset_{0};
   // Store the tensor index stored in this memory block at a certain moment
   int index_{0};
+  MEMTYPE type_{NEW};
   KernelDefPtr used_kernel_;
 };
 using MembufPtr = std::shared_ptr<Membuf>;
@@ -122,10 +124,10 @@ class BestFitMemReuse {
   /**
    * determine if the kernel_curr can reuse the output tensor add of kernel_prev
    * @param kernel_curr, current kernel
-   * @param kernel_prev, the membuf used by this kernel
+   * @param mem_buf, the membuf
    * @return bool
    */
-  bool IsUsable(const KernelDefPtr &kernel_curr, const KernelDefPtr &kernel_prev);
+  bool IsUsable(const KernelDefPtr &kernel_curr, const MembufPtr &mem_buf);
   /**
    * init the dependence of all kernels in the graph
    */
@@ -150,6 +152,7 @@ class BestFitMemReuse {
   std::vector<MembufPtr> membuf_ptr_list_;
   // kernel_front_map_, key: the kernel_def, value: kernels before this kernel_def
   std::map<KernelDefPtr, std::set<KernelDefPtr>> kernel_front_map_;
+  std::vector<std::vector<uint32_t>> stream_groups_;
 };
 }  // namespace memreuse
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_checker.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.cc
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_checker.cc
index 5cd6a5f50ec..b93bf42f9f6 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_checker.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/mem_reuse/mem_reuse_checker.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
 #include <fstream>
 #include <vector>
 #include <utility>
@@ -413,7 +413,8 @@ void MemReuseChecker::CheckNormalIR(const session::KernelGraph *graph) {
 void MemReuseChecker::SetMembuInfos(const KernelDef *op_def, const std::vector<MembufPtr> &membuf_ptr_list) {
   std::vector<MembufPtr> curr_mem_infos;
   for (const auto &mem : membuf_ptr_list) {
-    auto mem_checker = std::make_shared<Membuf>(mem->status_, mem->size_, mem->offset_, mem->index_, mem->used_kernel_);
+    auto mem_checker =
+      std::make_shared<Membuf>(mem->status_, mem->size_, mem->offset_, mem->index_, mem->type_, mem->used_kernel_);
     curr_mem_infos.push_back(mem_checker);
   }
   membuf_all_infos_.push_back(curr_mem_infos);
@@ -427,7 +428,8 @@ void MemReuseChecker::SetAddNewMembuInfos(const KernelDef *op_def, const std::ve
   std::vector<MembufPtr> add_new_curr_mem;
 
   for (const auto &mem : membuf_ptr_list) {
-    auto mem_checker = std::make_shared<Membuf>(mem->status_, mem->size_, mem->offset_, mem->index_, mem->used_kernel_);
+    auto mem_checker =
+      std::make_shared<Membuf>(mem->status_, mem->size_, mem->offset_, mem->index_, mem->type_, mem->used_kernel_);
     add_new_curr_mem.push_back(mem_checker);
   }
   add_new_mem_infos_.push_back(add_new_curr_mem);
@@ -451,6 +453,7 @@ void MemReuseChecker::ExportEachMembufInfo(std::ofstream &ofs) {
         << "mem_size\t"
         << "mem_head\t"
         << "mem_tail\t"
+        << "mem_type\t"
         << "used_kernel\n";
     size_t curr_used = 0;
     size_t curr_allocated = 0;
@@ -461,8 +464,8 @@ void MemReuseChecker::ExportEachMembufInfo(std::ofstream &ofs) {
           << "streamID[@" << membuf->used_kernel_->stream_id() << "]"
           << "\t"
           << "#" << static_cast<int>(membuf->status_) << "\t%" << membuf->index_ << "T"
-          << "\t" << membuf->size_ << "\t" << membuf->offset_ << "\t" << membuf->offset_ + membuf->size_ << "\t"
-          << GetSplitName(used_kernel) << "\n";
+          << "\t" << membuf->size_ << "\t" << membuf->offset_ << "\t\t" << membuf->offset_ + membuf->size_ << "\t"
+          << "\t" << static_cast<int>(membuf->type_) << "\t" << GetSplitName(used_kernel) << "\n";
       if (membuf->status_ == kReused) {
         curr_used += membuf->size_;
       }
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_checker.h
similarity index 94%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.h
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_checker.h
index 5fd3d0f5aec..3c4a00a3ca8 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.h
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_checker.h
@@ -22,11 +22,11 @@
 #include <string>
 #include <memory>
 #include <functional>
-#include "mindspore/ccsrc/ir/anf.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/mem_reuse/mem_reuse.h"
-#include "kernel/common_utils.h"
-#include "pre_activate/mem_reuse/mem_reuse_allocator.h"
+#include "mindspore/core/ir/anf.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/mem_reuse/mem_reuse.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
 namespace mindspore {
 namespace memreuse {
 constexpr auto kSend = "Send";
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_swap_manager.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.cc
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_swap_manager.cc
index 14073bfbc95..41bf5460c3c 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_swap_manager.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "pre_activate/mem_reuse/mem_swap_manager.h"
+#include "backend/optimizer/mem_reuse/mem_swap_manager.h"
 #include <algorithm>
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_swap_manager.h
similarity index 98%
rename from mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h
rename to mindspore/ccsrc/backend/optimizer/mem_reuse/mem_swap_manager.h
index 1969dadb54c..d8620c85162 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_swap_manager.h
@@ -23,7 +23,7 @@
 #include <memory>
 #include <vector>
 #include <utility>
-#include "pre_activate/mem_reuse/mem_copy_manager.h"
+#include "backend/optimizer/mem_reuse/mem_copy_manager.h"
 
 using PerformPair = std::pair<float, float>;
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.cc b/mindspore/ccsrc/backend/optimizer/pass/add_atomic_clean.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/add_atomic_clean.cc
rename to mindspore/ccsrc/backend/optimizer/pass/add_atomic_clean.cc
index 9df34a1c591..900dd0d5639 100644
--- a/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/add_atomic_clean.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "pre_activate/pass/add_atomic_clean.h"
+#include "backend/optimizer/pass/add_atomic_clean.h"
 #include <memory>
 #include <vector>
 #include <functional>
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/utils.h"
 #include "utils/graph_utils.h"
 #include "utils/log_adapter.h"
-#include "session/anf_runtime_algorithm.h"
-#include "session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.h b/mindspore/ccsrc/backend/optimizer/pass/add_atomic_clean.h
similarity index 95%
rename from mindspore/ccsrc/pre_activate/pass/add_atomic_clean.h
rename to mindspore/ccsrc/backend/optimizer/pass/add_atomic_clean.h
index bb1edb0e359..7e3fbdb472c 100644
--- a/mindspore/ccsrc/pre_activate/pass/add_atomic_clean.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/add_atomic_clean.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ADD_ATOMIC_CLEAN_H_
 
 #include <memory>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.cc b/mindspore/ccsrc/backend/optimizer/pass/common_subexpression_elimination.cc
similarity index 88%
rename from mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.cc
rename to mindspore/ccsrc/backend/optimizer/pass/common_subexpression_elimination.cc
index 297a167aa8e..133a7e764a7 100644
--- a/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/common_subexpression_elimination.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/common_subexpression_elimination.h"
+#include "backend/optimizer/pass/common_subexpression_elimination.h"
 #include <memory>
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 
 namespace mindspore {
 namespace opt {
@@ -23,8 +23,8 @@ namespace {
 bool CheckEqualKernelBuildInfo(const AnfNodePtr &main, const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(main);
   MS_EXCEPTION_IF_NULL(node);
-  auto main_kernel_info = main->kernel_info();
-  auto node_kernel_info = node->kernel_info();
+  auto main_kernel_info = dynamic_cast<device::KernelInfo *>(main->kernel_info());
+  auto node_kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   if (main_kernel_info == nullptr && node_kernel_info == nullptr) {
     return true;
   }
diff --git a/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.h b/mindspore/ccsrc/backend/optimizer/pass/common_subexpression_elimination.h
similarity index 94%
rename from mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.h
rename to mindspore/ccsrc/backend/optimizer/pass/common_subexpression_elimination.h
index 18f433ab955..bac870e59f8 100644
--- a/mindspore/ccsrc/pre_activate/pass/common_subexpression_elimination.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/common_subexpression_elimination.h
@@ -15,8 +15,8 @@
  */
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_COMMON_SUBEXPRESSION_ELIMINATION_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_COMMON_SUBEXPRESSION_ELIMINATION_H_
-#include "pre_activate/common/pass.h"
-#include "optimizer/cse.h"
+#include "backend/optimizer/common/pass.h"
+#include "frontend/optimizer/cse.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc
rename to mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc
index aa4690abcbc..3ba055880cd 100644
--- a/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/communication_op_fusion.h"
+#include "backend/optimizer/pass/communication_op_fusion.h"
 
 #include <vector>
 #include <memory>
 #include <unordered_map>
 
 #include "utils/graph_utils.h"
-#include "operator/ops.h"
-#include "device/kernel_info.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/kernel_build_info.h"
-#include "parallel/context.h"
+#include "frontend/operator/ops.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "frontend/parallel/context.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.h b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/communication_op_fusion.h
rename to mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.h
index d00180f97f0..0e7cf9762db 100644
--- a/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.h
@@ -19,7 +19,7 @@
 #include <vector>
 #include <string>
 
-#include "pre_activate/common/pass.h"
+#include "backend/optimizer/common/pass.h"
 #include "ir/func_graph.h"
 #include "ir/anf.h"
 #include "utils/utils.h"
diff --git a/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc b/mindspore/ccsrc/backend/optimizer/pass/const_input_to_attr_registry.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc
rename to mindspore/ccsrc/backend/optimizer/pass/const_input_to_attr_registry.cc
index 6a557388adf..814ad9567c1 100644
--- a/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/const_input_to_attr_registry.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/const_input_to_attr_registry.h"
+#include "backend/optimizer/pass/const_input_to_attr_registry.h"
 
 #include <utility>
 
 #include "utils/utils.h"
 #include "utils/log_adapter.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
@@ -72,6 +72,7 @@ ConstInputToAttrInfoRegistry::ConstInputToAttrInfoRegistry() {
   Register(kSpaceToBatchOpName, {1});
   Register(kBatchToSpaceOpName, {1});
   Register(kPadOpName, {1});
+  Register(kPushOpName, {1});
 }
 
 ConstInputToAttrInfoRegistry &ConstInputToAttrInfoRegistry::Instance() {
diff --git a/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.h b/mindspore/ccsrc/backend/optimizer/pass/const_input_to_attr_registry.h
similarity index 100%
rename from mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.h
rename to mindspore/ccsrc/backend/optimizer/pass/const_input_to_attr_registry.h
diff --git a/mindspore/ccsrc/pre_activate/pass/const_to_attr_strided_slice_grad.cc b/mindspore/ccsrc/backend/optimizer/pass/const_to_attr_strided_slice_grad.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/pass/const_to_attr_strided_slice_grad.cc
rename to mindspore/ccsrc/backend/optimizer/pass/const_to_attr_strided_slice_grad.cc
index b0e2ab044c7..51d399bbcd1 100644
--- a/mindspore/ccsrc/pre_activate/pass/const_to_attr_strided_slice_grad.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/const_to_attr_strided_slice_grad.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/const_to_attr_strided_slice_grad.h"
+#include "backend/optimizer/pass/const_to_attr_strided_slice_grad.h"
 #include <memory>
 #include <vector>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/context/ms_context.h"
 #include "utils/utils.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pre_activate/common/helper.h"
+#include "abstract/abstract_value.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/const_to_attr_strided_slice_grad.h b/mindspore/ccsrc/backend/optimizer/pass/const_to_attr_strided_slice_grad.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/const_to_attr_strided_slice_grad.h
rename to mindspore/ccsrc/backend/optimizer/pass/const_to_attr_strided_slice_grad.h
index 2e364244bf7..83b44d5f51e 100644
--- a/mindspore/ccsrc/pre_activate/pass/const_to_attr_strided_slice_grad.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/const_to_attr_strided_slice_grad.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_CONST_TO_ATTR_STRIDED_SLICE_GRAD_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc
new file mode 100644
index 00000000000..f2e35351b4e
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc
@@ -0,0 +1,58 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/pass/convert_const_input_to_attr.h"
+
+#include <vector>
+#include <string>
+#include <unordered_map>
+#include <memory>
+
+#include "backend/optimizer/pass/const_input_to_attr_registry.h"
+#include "backend/optimizer/common/helper.h"
+#include "utils/utils.h"
+#include "utils/context/ms_context.h"
+#include "frontend/operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/common_utils.h"
+
+namespace mindspore {
+namespace opt {
+const AnfNodePtr ConvertConstInputToAttr::Process(const FuncGraphPtr &, const AnfNodePtr &node,
+                                                  const EquivPtr &) const {
+  if (node == nullptr || !AnfAlgo::IsRealCNodeKernel(node)) {
+    return nullptr;
+  }
+  std::vector<AnfNodePtr> todos;
+  if (AnfAlgo::IsGraphKernel(node)) {
+    auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node);
+    MS_EXCEPTION_IF_NULL(sub_graph);
+    kernel::GetValidKernelNodes(sub_graph, &todos);
+  } else {
+    todos.push_back(node);
+  }
+
+  for (auto &t : todos) {
+    CNodePtr cnode = t->cast<CNodePtr>();
+    ConstInputToAttrInfoRegister reg;
+    if (!ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(AnfAlgo::GetCNodeName(cnode), &reg)) {
+      continue;
+    }
+    ConstInputToAttr(cnode, reg.GetConstInputAttrInfo());
+  }
+  return node;
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.h b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.h
rename to mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.h
index e124ff8cf4e..e6def42fa1f 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.h
@@ -20,7 +20,7 @@
 #include <unordered_set>
 
 #include "ir/anf.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.cc b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_tensor_input.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.cc
rename to mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_tensor_input.cc
index b4f98cc6d71..f204841f3c0 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_tensor_input.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/convert_const_input_to_tensor_input.h"
+#include "backend/optimizer/pass/convert_const_input_to_tensor_input.h"
 
 #include <vector>
 #include <memory>
 #include <utility>
 
 #include "utils/graph_utils.h"
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
-#include "session/kernel_graph.h"
-#include "kernel/common_utils.h"
-#include "device/kernel_info.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/kernel_info.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.h b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_tensor_input.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.h
rename to mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_tensor_input.h
index 1cc2bdf0ecf..072652497ab 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_tensor_input.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_tensor_input.h
@@ -18,7 +18,7 @@
 #include <string>
 
 #include "ir/anf.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.cc b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc
similarity index 95%
rename from mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.cc
rename to mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc
index a03087c1a4a..b96a7af8f30 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/convert_tuple_input_to_dynamic_input.h"
+#include "backend/optimizer/pass/convert_tuple_input_to_dynamic_input.h"
 
 #include <algorithm>
 #include <memory>
 
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
-#include "session/kernel_graph.h"
-#include "kernel/common_utils.h"
-#include "device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/kernel_info.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.h b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.h
rename to mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.h
index b3d8e25d6ef..63d2415dc52 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_tuple_input_to_dynamic_input.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.h
@@ -20,7 +20,7 @@
 #include <vector>
 
 #include "ir/anf.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.cc b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_output_to_maketuple.cc
similarity index 93%
rename from mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.cc
rename to mindspore/ccsrc/backend/optimizer/pass/convert_tuple_output_to_maketuple.cc
index a5e51411bc2..34ba83ef170 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_output_to_maketuple.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/convert_tuple_output_to_maketuple.h"
+#include "backend/optimizer/pass/convert_tuple_output_to_maketuple.h"
 
 #include <algorithm>
 #include <memory>
 
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
-#include "session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.h b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_output_to_maketuple.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.h
rename to mindspore/ccsrc/backend/optimizer/pass/convert_tuple_output_to_maketuple.h
index a16ffaf674c..9ff5ca91ed9 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_tuple_output_to_maketuple.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_output_to_maketuple.h
@@ -20,7 +20,7 @@
 #include <vector>
 
 #include "ir/anf.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.cc b/mindspore/ccsrc/backend/optimizer/pass/eliminate_redundant_op.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.cc
rename to mindspore/ccsrc/backend/optimizer/pass/eliminate_redundant_op.cc
index 4d3dcfccc00..3ef912bcecd 100644
--- a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/eliminate_redundant_op.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "pre_activate/pass/eliminate_redundant_op.h"
+#include "backend/optimizer/pass/eliminate_redundant_op.h"
 #include <memory>
 #include <utility>
 #include <unordered_map>
 #include <unordered_set>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
-#include "operator/ops.h"
-#include "kernel/common_utils.h"
+#include "backend/optimizer/common/helper.h"
+#include "frontend/operator/ops.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.h b/mindspore/ccsrc/backend/optimizer/pass/eliminate_redundant_op.h
similarity index 94%
rename from mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.h
rename to mindspore/ccsrc/backend/optimizer/pass/eliminate_redundant_op.h
index c44190f6459..2fb4715cff1 100644
--- a/mindspore/ccsrc/pre_activate/pass/eliminate_redundant_op.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/eliminate_redundant_op.h
@@ -22,8 +22,8 @@
 #include <utility>
 #include <unordered_map>
 #include "ir/anf.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.cc b/mindspore/ccsrc/backend/optimizer/pass/erase_visit_attr.cc
similarity index 88%
rename from mindspore/ccsrc/pre_activate/pass/erase_visit_attr.cc
rename to mindspore/ccsrc/backend/optimizer/pass/erase_visit_attr.cc
index 3b566b4f7cb..8c6cb4beb56 100644
--- a/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/erase_visit_attr.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "pre_activate/pass/erase_visit_attr.h"
+#include "backend/optimizer/pass/erase_visit_attr.h"
 #include <memory>
 #include <vector>
-#include "kernel/common_utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/helper.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.h b/mindspore/ccsrc/backend/optimizer/pass/erase_visit_attr.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/erase_visit_attr.h
rename to mindspore/ccsrc/backend/optimizer/pass/erase_visit_attr.h
index a986aad83a1..37b88a4e390 100644
--- a/mindspore/ccsrc/pre_activate/pass/erase_visit_attr.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/erase_visit_attr.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ERASE_VISIT_ATTR_H_
 
 #include <string>
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_basic.cc b/mindspore/ccsrc/backend/optimizer/pass/fuse_basic.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/pass/fuse_basic.cc
rename to mindspore/ccsrc/backend/optimizer/pass/fuse_basic.cc
index 84edd5c5e2f..32655f1ec2d 100644
--- a/mindspore/ccsrc/pre_activate/pass/fuse_basic.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/fuse_basic.cc
@@ -14,8 +14,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/fuse_basic.h"
-#include "pre_activate/pass/fuse_graph_kernel.h"
+#include "backend/optimizer/pass/fuse_basic.h"
+#include "backend/optimizer/pass/fuse_graph_kernel.h"
 
 #include <memory>
 #include <algorithm>
@@ -24,11 +24,11 @@
 #include <vector>
 #include <string>
 
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/utils.h"
 #include "utils/graph_utils.h"
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "vm/segment_runner.h"
 #include "debug/draw.h"
 #include "debug/anf_ir_dump.h"
diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_basic.h b/mindspore/ccsrc/backend/optimizer/pass/fuse_basic.h
similarity index 91%
rename from mindspore/ccsrc/pre_activate/pass/fuse_basic.h
rename to mindspore/ccsrc/backend/optimizer/pass/fuse_basic.h
index fbbf5d99370..9b3916fe280 100644
--- a/mindspore/ccsrc/pre_activate/pass/fuse_basic.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/fuse_basic.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_FUSE_BASIC_H_
 
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.cc b/mindspore/ccsrc/backend/optimizer/pass/fuse_graph_kernel.cc
similarity index 99%
rename from mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.cc
rename to mindspore/ccsrc/backend/optimizer/pass/fuse_graph_kernel.cc
index 0e287587a2c..e04110d8a02 100644
--- a/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/fuse_graph_kernel.cc
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/fuse_graph_kernel.h"
+#include "backend/optimizer/pass/fuse_graph_kernel.h"
 
 #include <memory>
 #include <string>
@@ -25,11 +25,11 @@
 #include <queue>
 #include <vector>
 
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/utils.h"
 #include "utils/graph_utils.h"
-#include "pre_activate/common/helper.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "vm/segment_runner.h"
 #include "debug/draw.h"
 #include "debug/anf_ir_dump.h"
diff --git a/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.h b/mindspore/ccsrc/backend/optimizer/pass/fuse_graph_kernel.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.h
rename to mindspore/ccsrc/backend/optimizer/pass/fuse_graph_kernel.h
index a5a26765a3d..e14661dfdfd 100644
--- a/mindspore/ccsrc/pre_activate/pass/fuse_graph_kernel.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/fuse_graph_kernel.h
@@ -21,8 +21,8 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include "pre_activate/common/optimizer.h"
-#include "session/kernel_graph.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/getitem_tuple.cc b/mindspore/ccsrc/backend/optimizer/pass/getitem_tuple.cc
similarity index 94%
rename from mindspore/ccsrc/pre_activate/pass/getitem_tuple.cc
rename to mindspore/ccsrc/backend/optimizer/pass/getitem_tuple.cc
index af16017a7c0..a51a6bab421 100644
--- a/mindspore/ccsrc/pre_activate/pass/getitem_tuple.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/getitem_tuple.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/getitem_tuple.h"
+#include "backend/optimizer/pass/getitem_tuple.h"
 
 #include <memory>
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/utils.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/getitem_tuple.h b/mindspore/ccsrc/backend/optimizer/pass/getitem_tuple.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/getitem_tuple.h
rename to mindspore/ccsrc/backend/optimizer/pass/getitem_tuple.h
index 0fc42a15dc9..9a25b924bd3 100644
--- a/mindspore/ccsrc/pre_activate/pass/getitem_tuple.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/getitem_tuple.h
@@ -16,7 +16,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_GETITEM_TUPLE_SPLIT_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_GETITEM_TUPLE_SPLIT_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/optimize_dependence.cc b/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/optimize_dependence.cc
rename to mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc
index 1d5f909e7d2..710e130a85d 100644
--- a/mindspore/ccsrc/pre_activate/pass/optimize_dependence.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "pre_activate/pass/optimize_dependence.h"
+#include "backend/optimizer/pass/optimize_dependence.h"
 #include <memory>
 #include <vector>
 #include <string>
-#include "pre_activate/common/helper.h"
-#include "operator/ops.h"
+#include "backend/optimizer/common/helper.h"
+#include "frontend/operator/ops.h"
 #include "utils/utils.h"
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/pre_activate/pass/optimize_dependence.h b/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.h
similarity index 96%
rename from mindspore/ccsrc/pre_activate/pass/optimize_dependence.h
rename to mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.h
index 30027b790aa..8ddd4d662ea 100644
--- a/mindspore/ccsrc/pre_activate/pass/optimize_dependence.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_OPTIMIZE_DEPENDENCE_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_OPTIMIZE_DEPENDENCE_H_
 
-#include "pre_activate/common/optimizer.h"
+#include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/backend/optimizer/pass/replace_node_by_proxy.cc b/mindspore/ccsrc/backend/optimizer/pass/replace_node_by_proxy.cc
new file mode 100644
index 00000000000..cd34464cda8
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/pass/replace_node_by_proxy.cc
@@ -0,0 +1,92 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/pass/replace_node_by_proxy.h"
+#include <vector>
+#include <memory>
+#include "runtime/device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+
+namespace mindspore {
+namespace opt {
+kernel::KernelBuildInfoPtr ReplaceNodeByProxy::GenerateKernelBuildInfo(const CNodePtr &cnode) {
+  MS_EXCEPTION_IF_NULL(cnode);
+  std::vector<std::string> inputs_device_format;
+  std::vector<std::string> outputs_device_format;
+  std::vector<TypeId> inputs_device_type;
+  std::vector<TypeId> outputs_device_type;
+  std::vector<std::vector<size_t>> outputs_shape;
+  kernel::KernelBuildInfo::KernelBuildInfoBuilder builder;
+  for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); ++input_index) {
+    inputs_device_format.push_back(AnfAlgo::GetInputFormat(cnode, input_index));
+    inputs_device_type.push_back(AnfAlgo::GetInputDeviceDataType(cnode, input_index));
+  }
+  for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(cnode); ++output_index) {
+    outputs_device_format.push_back(AnfAlgo::GetOutputFormat(cnode, output_index));
+    outputs_device_type.push_back(AnfAlgo::GetOutputDeviceDataType(cnode, output_index));
+    outputs_shape.push_back(AnfAlgo::GetOutputInferShape(cnode, output_index));
+  }
+  builder.SetFusionType(AnfAlgo::GetFusionType(cnode));
+  builder.SetProcessor(AnfAlgo::GetProcessor(cnode));
+  builder.SetKernelType(AnfAlgo::GetKernelType(cnode));
+
+  builder.SetInputsFormat(inputs_device_format);
+  builder.SetOutputsFormat(outputs_device_format);
+  builder.SetInputsDeviceType(inputs_device_type);
+  builder.SetOutputsDeviceType(outputs_device_type);
+  return builder.Build();
+}
+
+bool ReplaceNodeByProxy::Run(const FuncGraphPtr &func_graph) {
+  MS_EXCEPTION_IF_NULL(func_graph);
+  auto manager = func_graph->manager();
+  MS_EXCEPTION_IF_NULL(manager);
+  std::vector<AnfNodePtr> node_list = TopoSort(func_graph->get_return());
+  for (auto node : node_list) {
+    if (node != nullptr && node->isa<CNode>() && AnfAlgo::GetCNodeName(node) == kEmbeddingLookupOpName) {
+      CNodePtr cnode = node->cast<CNodePtr>();
+      auto prim = std::make_shared<Primitive>(kEmbeddingLookupProxyOpName);
+      MS_EXCEPTION_IF_NULL(prim);
+      std::vector<AnfNodePtr> proxy_inputs = {NewValueNode(prim)};
+      proxy_inputs.insert(proxy_inputs.end(), cnode->inputs().begin() + 1, cnode->inputs().end());
+      AnfNodePtr proxy_node = func_graph->NewCNode(proxy_inputs);
+      MS_EXCEPTION_IF_NULL(proxy_node);
+
+      auto kernel_info = std::make_shared<device::KernelInfo>();
+      MS_EXCEPTION_IF_NULL(kernel_info);
+      proxy_node->set_kernel_info(kernel_info);
+
+      AbstractBasePtrList abstract_list;
+      AnfAlgo::CopyNodeAttr(kAttrPsKey, cnode, proxy_node);
+      AnfAlgo::CopyNodeAttr("reduce_scatter_flag", cnode, proxy_node);
+      AnfAlgo::CopyNodeAttr("offset", cnode, proxy_node);
+      abstract_list.push_back(cnode->abstract());
+      auto abstract_tuple = std::make_shared<abstract::AbstractTuple>(abstract_list);
+      MS_EXCEPTION_IF_NULL(abstract_tuple);
+      proxy_node->set_abstract(abstract_tuple);
+
+      auto kernel_build_info = GenerateKernelBuildInfo(cnode);
+      AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info, proxy_node.get());
+
+      if (!manager->Replace(cnode, proxy_node)) {
+        MS_LOG(EXCEPTION) << "Replace node by proxy node failed.";
+      }
+    }
+  }
+  return true;
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/pass/replace_node_by_proxy.h b/mindspore/ccsrc/backend/optimizer/pass/replace_node_by_proxy.h
new file mode 100644
index 00000000000..382b08304ff
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/pass/replace_node_by_proxy.h
@@ -0,0 +1,41 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_REPLACE_NODE_BY_PROXY_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_REPLACE_NODE_BY_PROXY_H_
+#include <utility>
+#include <vector>
+#include <string>
+
+#include "backend/optimizer/common/pass.h"
+#include "ir/func_graph.h"
+#include "ir/anf.h"
+#include "utils/utils.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+
+namespace mindspore {
+namespace opt {
+class ReplaceNodeByProxy : public Pass {
+ public:
+  explicit ReplaceNodeByProxy(const std::string &name) : Pass(name) {}
+  ~ReplaceNodeByProxy() override = default;
+  bool Run(const FuncGraphPtr &graph) override;
+
+ private:
+  kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const CNodePtr &cnode);
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_REPLACE_NODE_BY_PROXY_H_
diff --git a/mindspore/ccsrc/session/CMakeLists.txt b/mindspore/ccsrc/backend/session/CMakeLists.txt
similarity index 89%
rename from mindspore/ccsrc/session/CMakeLists.txt
rename to mindspore/ccsrc/backend/session/CMakeLists.txt
index 782eb511837..b7b791ada95 100644
--- a/mindspore/ccsrc/session/CMakeLists.txt
+++ b/mindspore/ccsrc/backend/session/CMakeLists.txt
@@ -29,4 +29,4 @@ if (ENABLE_D)
 endif ()
 
 set_property(SOURCE ${_SESSION_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_SESSION)
-add_library(_mindspore_session_obj OBJECT ${_SESSION_SRC_LIST})
+add_library(_mindspore_backend_session_obj OBJECT ${_SESSION_SRC_LIST})
diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
similarity index 87%
rename from mindspore/ccsrc/session/anf_runtime_algorithm.cc
rename to mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
index 81ad02e787a..38c040e6b15 100644
--- a/mindspore/ccsrc/session/anf_runtime_algorithm.cc
+++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
@@ -13,20 +13,20 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include <memory>
 #include <algorithm>
 #include <map>
 #include <set>
 #include "ir/anf.h"
 #include "ir/func_graph.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/utils.h"
-#include "device/kernel_info.h"
-#include "device/device_address.h"
-#include "pre_activate/common/helper.h"
-#include "kernel/kernel.h"
-#include "kernel/kernel_build_info.h"
+#include "runtime/device/kernel_info.h"
+#include "runtime/device/device_address.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include "common/utils.h"
 #include "common/trans.h"
 
@@ -40,6 +40,9 @@ using kernel::KernelBuildInfoPtr;
 using kernel::KernelMod;
 using kernel::KernelModPtr;
 namespace {
+constexpr size_t kNopNodeInputSize = 2;
+constexpr size_t kNopNodeRealInputIndex = 1;
+
 std::vector<size_t> TransShapeToSizet(const abstract::ShapePtr &shape) {
   MS_EXCEPTION_IF_NULL(shape);
   std::vector<size_t> shape_size_t;
@@ -48,6 +51,26 @@ std::vector<size_t> TransShapeToSizet(const abstract::ShapePtr &shape) {
 }
 }  // namespace
 
+AnfNodePtr AnfRuntimeAlgorithm::GetTupleGetItemRealInput(const CNodePtr &tuple_get_item) {
+  MS_EXCEPTION_IF_NULL(tuple_get_item);
+  if (tuple_get_item->size() != kTupleGetItemInputSize) {
+    MS_LOG(EXCEPTION) << "The node tuple_get_item must have 2 inputs!";
+  }
+  return tuple_get_item->input(kRealInputNodeIndexInTupleGetItem);
+}
+
+size_t AnfRuntimeAlgorithm::GetTupleGetItemOutIndex(const CNodePtr &tuple_get_item) {
+  MS_EXCEPTION_IF_NULL(tuple_get_item);
+  if (tuple_get_item->size() != kTupleGetItemInputSize) {
+    MS_LOG(EXCEPTION) << "The node tuple_get_item must have 2 inputs!";
+  }
+  auto output_index_value_node = tuple_get_item->input(kInputNodeOutputIndexInTupleGetItem);
+  MS_EXCEPTION_IF_NULL(output_index_value_node);
+  auto value_node = output_index_value_node->cast<ValueNodePtr>();
+  MS_EXCEPTION_IF_NULL(value_node);
+  return IntToSize(GetValue<int>(value_node->value()));
+}
+
 KernelWithIndex AnfRuntimeAlgorithm::VisitKernel(const AnfNodePtr &anf_node, size_t index) {
   MS_EXCEPTION_IF_NULL(anf_node);
   if (anf_node->isa<ValueNode>()) {
@@ -83,49 +106,47 @@ KernelWithIndex AnfRuntimeAlgorithm::VisitKernel(const AnfNodePtr &anf_node, siz
   }
 }
 
-KernelWithIndex AnfRuntimeAlgorithm::VisitKernelWithReturnType(const AnfNodePtr &anf_node, size_t index,
+KernelWithIndex AnfRuntimeAlgorithm::VisitKernelWithReturnType(const AnfNodePtr &anf_node, int index,
                                                                bool visit_nop_node,
                                                                const std::vector<PrimitivePtr> &return_types) {
   MS_EXCEPTION_IF_NULL(anf_node);
-  for (const auto &prim_type : return_types) {
-    if (CheckPrimitiveType(anf_node, prim_type)) {
-      return std::make_pair(anf_node, index);
-    }
+  if (std::any_of(return_types.begin(), return_types.end(), [&anf_node](const PrimitivePtr &prim_type) -> bool {
+        return CheckPrimitiveType(anf_node, prim_type);
+      })) {
+    return KernelWithIndex(anf_node, index);
   }
-  if (anf_node->isa<ValueNode>()) {
-    return std::make_pair(anf_node, 0);
-  } else if (anf_node->isa<Parameter>()) {
-    return std::make_pair(anf_node, 0);
-  } else if (anf_node->isa<CNode>()) {
-    auto cnode = anf_node->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(cnode);
-    auto input0 = cnode->input(0);
-    MS_EXCEPTION_IF_NULL(input0);
-    if (IsPrimitive(input0, prim::kPrimTupleGetItem)) {
-      if (cnode->inputs().size() != kTupleGetItemInputSize) {
-        MS_LOG(EXCEPTION) << "The node tuple_get_item must have 2 inputs!";
-      }
-      auto input2 = cnode->input(kInputNodeOutputIndexInTupleGetItem);
-      MS_EXCEPTION_IF_NULL(input2);
-      auto value_node = input2->cast<ValueNodePtr>();
-      MS_EXCEPTION_IF_NULL(value_node);
-      int item_idx = GetValue<int>(value_node->value());
-      return VisitKernelWithReturnType(cnode->input(kRealInputNodeIndexInTupleGetItem), IntToSize(item_idx),
-                                       visit_nop_node, return_types);
-    } else if (IsPrimitive(input0, prim::kPrimDepend) || IsPrimitive(input0, prim::kPrimControlDepend)) {
-      return VisitKernelWithReturnType(cnode->input(kRealInputIndexInDepend), 0, visit_nop_node, return_types);
-    } else if (opt::IsNopNode(cnode) && visit_nop_node) {
-      if (cnode->inputs().size() == 2) {
-        return VisitKernelWithReturnType(cnode->input(1), 0, visit_nop_node, return_types);
-      } else {
-        MS_LOG(EXCEPTION) << cnode->DebugString() << "Invalid nop node";
-      }
-    } else {
-      return std::make_pair(anf_node, index);
-    }
-  } else {
-    MS_LOG(EXCEPTION) << "The input is invalid";
+  if (!anf_node->isa<CNode>()) {
+    return KernelWithIndex(anf_node, 0);
   }
+  auto cnode = anf_node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(cnode);
+  if (CheckPrimitiveType(cnode, prim::kPrimTupleGetItem)) {
+    auto item_with_index_tmp = VisitKernelWithReturnType(GetTupleGetItemRealInput(cnode),
+                                                         GetTupleGetItemOutIndex(cnode), visit_nop_node, return_types);
+    if (CheckPrimitiveType(item_with_index_tmp.first, prim::kPrimMakeTuple)) {
+      MS_EXCEPTION_IF_NULL(item_with_index_tmp.first);
+      auto make_tuple = item_with_index_tmp.first->cast<CNodePtr>();
+      MS_EXCEPTION_IF_NULL(make_tuple);
+      const std::vector<AnfNodePtr> &make_tuple_inputs = make_tuple->inputs();
+      size_t make_tuple_input_index = item_with_index_tmp.second + 1;
+      if (make_tuple_input_index >= make_tuple_inputs.size()) {
+        MS_LOG(EXCEPTION) << "Index[" << make_tuple_input_index << "] out of range[" << make_tuple_inputs.size()
+                          << "].";
+      }
+      return VisitKernelWithReturnType(make_tuple_inputs[make_tuple_input_index], 0, visit_nop_node, return_types);
+    }
+    return item_with_index_tmp;
+  }
+  if (CheckPrimitiveType(cnode, prim::kPrimDepend) || CheckPrimitiveType(cnode, prim::kPrimControlDepend)) {
+    return VisitKernelWithReturnType(cnode->input(kRealInputIndexInDepend), index, visit_nop_node, return_types);
+  }
+  if (opt::IsNopNode(cnode) && visit_nop_node) {
+    if (cnode->size() != kNopNodeInputSize) {
+      MS_LOG(EXCEPTION) << "Invalid nop node " << cnode->DebugString();
+    }
+    return VisitKernelWithReturnType(cnode->input(kNopNodeRealInputIndex), 0, visit_nop_node, return_types);
+  }
+  return KernelWithIndex(anf_node, index);
 }
 
 std::vector<AnfNodePtr> AnfRuntimeAlgorithm::GetAllOutput(const AnfNodePtr &node,
@@ -338,7 +359,7 @@ std::string AnfRuntimeAlgorithm::GetOutputFormat(const AnfNodePtr &node, size_t
   if (!AnfAlgo::IsRealKernel(node)) {
     return AnfAlgo::GetPrevNodeOutputFormat(node, output_idx);
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -360,7 +381,7 @@ std::string AnfRuntimeAlgorithm::GetInputFormat(const AnfNodePtr &node, size_t i
   if (!IsRealKernel(node)) {
     GetPrevNodeOutputFormat(node, input_idx);
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -467,7 +488,7 @@ std::vector<kernel::Axis> AnfRuntimeAlgorithm::GetInputReshapeType(const AnfNode
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputReshapeType(node, input_idx);
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -486,7 +507,7 @@ std::vector<kernel::Axis> AnfRuntimeAlgorithm::GetOutputReshapeType(const AnfNod
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputReshapeType(node, output_idx);
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -546,7 +567,7 @@ TypeId AnfRuntimeAlgorithm::GetOutputDeviceDataType(const AnfNodePtr &node, size
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputDeviceDataType(node, output_idx);
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -567,7 +588,7 @@ TypeId AnfRuntimeAlgorithm::GetInputDeviceDataType(const AnfNodePtr &node, size_
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputDeviceDataType(node, 0);
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -591,13 +612,13 @@ const DeviceAddress *AnfRuntimeAlgorithm::GetOutputAddr(const AnfNodePtr &node,
   if (opt::IsNopNode(node) && visit_nop_node) {
     auto cnode = node->cast<CNodePtr>();
     MS_EXCEPTION_IF_NULL(cnode);
-    if (cnode->inputs().size() == 2) {
+    if (cnode->size() == kNopNodeInputSize) {
       return AnfRuntimeAlgorithm::GetPrevNodeOutputAddr(cnode, 0);
     } else {
       MS_LOG(EXCEPTION) << node->DebugString() << "Invalid nop node";
     }
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetOutputAddr(output_idx);
   if (addr == nullptr) {
@@ -613,13 +634,13 @@ DeviceAddressPtr AnfRuntimeAlgorithm::GetMutableOutputAddr(const AnfNodePtr &nod
   if (opt::IsNopNode(node) && visit_nop_node) {
     auto cnode = node->cast<CNodePtr>();
     MS_EXCEPTION_IF_NULL(cnode);
-    if (cnode->inputs().size() == 2) {
+    if (cnode->inputs().size() == kNopNodeInputSize) {
       return AnfRuntimeAlgorithm::GetPrevNodeMutableOutputAddr(cnode, 0);
     } else {
       MS_LOG(EXCEPTION) << node->DebugString() << "Invalid nop node.";
     }
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetMutableOutputAddr(output_idx);
   if (addr == nullptr) {
@@ -636,7 +657,7 @@ bool AnfRuntimeAlgorithm::OutputAddrExist(const AnfNodePtr &node, size_t output_
     MS_LOG(EXCEPTION) << "The index [" << output_idx << "] is out of range of the node's output size [ "
                       << GetOutputTensorNum(node) << "#node:[ " << node->DebugString() << "]";
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->OutputAddrExist(output_idx);
 }
@@ -656,7 +677,7 @@ DeviceAddressPtr AnfRuntimeAlgorithm::GetPrevNodeMutableOutputAddr(const AnfNode
 // set output device addr of anf_node
 void AnfRuntimeAlgorithm::SetOutputAddr(const DeviceAddressPtr &addr, size_t output_idx, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   if (!kernel_info->SetOutputAddr(addr, output_idx)) {
     MS_LOG(EXCEPTION) << "Node " << node->DebugString() << "set adr" << output_idx << " fail";
@@ -666,7 +687,7 @@ void AnfRuntimeAlgorithm::SetOutputAddr(const DeviceAddressPtr &addr, size_t out
 // set workspace device addr of anf_node
 void AnfRuntimeAlgorithm::SetWorkspaceAddr(const DeviceAddressPtr &addr, size_t output_idx, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   if (!kernel_info->SetWorkspaceAddr(addr, output_idx)) {
     MS_LOG(EXCEPTION) << "Node " << node->DebugString() << "set adr" << output_idx << " fail";
@@ -676,7 +697,7 @@ void AnfRuntimeAlgorithm::SetWorkspaceAddr(const DeviceAddressPtr &addr, size_t
 // get workspace device addr of anf_node
 DeviceAddress *AnfRuntimeAlgorithm::GetWorkspaceAddr(const AnfNodePtr &node, size_t output_idx) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetWorkspaceAddr(output_idx);
   if (addr == nullptr) {
@@ -720,7 +741,7 @@ void AnfRuntimeAlgorithm::CopyAbstract(const AnfNodePtr &from_node, AnfNode *to_
 
 kernel::OpPattern AnfRuntimeAlgorithm::GetOpPattern(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   // select_kernel_build_info() has checked whether return pointer is null
   auto build_info = kernel_info->select_kernel_build_info();
@@ -731,7 +752,7 @@ kernel::OpPattern AnfRuntimeAlgorithm::GetOpPattern(const AnfNodePtr &node) {
 // get KernelBuildType of node, such as ATT,RT,FWK and so on
 KernelType AnfRuntimeAlgorithm::GetKernelType(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   // select_kernel_build_info() has checked whether return pointer is null
   auto build_info = kernel_info->select_kernel_build_info();
@@ -741,7 +762,7 @@ KernelType AnfRuntimeAlgorithm::GetKernelType(const AnfNodePtr &node) {
 
 kernel::Processor AnfRuntimeAlgorithm::GetProcessor(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -750,7 +771,7 @@ kernel::Processor AnfRuntimeAlgorithm::GetProcessor(const AnfNodePtr &node) {
 
 kernel::FusionType AnfRuntimeAlgorithm::GetFusionType(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -760,7 +781,7 @@ kernel::FusionType AnfRuntimeAlgorithm::GetFusionType(const AnfNodePtr &node) {
 // set select kernel_build_info
 void AnfRuntimeAlgorithm::SetSelectKernelBuildInfo(const KernelBuildInfoPtr &select_kernel_build_info, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->set_select_kernel_build_info(select_kernel_build_info);
 }
@@ -768,7 +789,7 @@ void AnfRuntimeAlgorithm::SetSelectKernelBuildInfo(const KernelBuildInfoPtr &sel
 // get select kernel_build_info
 KernelBuildInfoPtr AnfRuntimeAlgorithm::GetSelectKernelBuildInfo(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->GetMutableSelectKernelBuildInfo();
 }
@@ -776,7 +797,7 @@ KernelBuildInfoPtr AnfRuntimeAlgorithm::GetSelectKernelBuildInfo(const AnfNodePt
 // get kernelMode
 KernelMod *AnfRuntimeAlgorithm::GetKernelMod(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->MutableKernelMod();
 }
@@ -784,7 +805,7 @@ KernelMod *AnfRuntimeAlgorithm::GetKernelMod(const AnfNodePtr &node) {
 // set kernel mod
 void AnfRuntimeAlgorithm::SetKernelMod(const KernelModPtr &kernel_mod, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_kernel_mod(kernel_mod);
 }
@@ -806,7 +827,7 @@ bool AnfRuntimeAlgorithm::IsRealKernel(const AnfNodePtr &node) {
                          IsPrimitive(input, prim::kPrimHistogramSummary) || IsPrimitive(input, prim::kPrimMakeTuple) ||
                          IsPrimitive(input, prim::kPrimStateSetItem) || IsPrimitive(input, prim::kPrimDepend) ||
                          IsPrimitive(input, prim::kPrimTupleGetItem) || IsPrimitive(input, prim::kPrimControlDepend) ||
-                         IsPrimitive(input, prim::kPrimReturn);
+                         IsPrimitive(input, prim::kPrimReturn) || IsPrimitive(input, prim::kPrimPartial);
   return !is_virtual_node;
 }
 
@@ -850,42 +871,42 @@ bool AnfRuntimeAlgorithm::IsParameterWeight(const ParameterPtr &node) {
 
 void AnfRuntimeAlgorithm::SetStreamId(uint32_t stream_id, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_stream_id(stream_id);
 }
 
 uint32_t AnfRuntimeAlgorithm::GetStreamId(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->stream_id();
 }
 
 void AnfRuntimeAlgorithm::SetStreamDistinctionLabel(uint32_t stream_label, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_stream_distinction_label(stream_label);
 }
 
 uint32_t AnfRuntimeAlgorithm::GetStreamDistinctionLabel(const AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<const device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->stream_distinction_label();
 }
 
 void AnfRuntimeAlgorithm::SetGraphId(uint32_t graph_id, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_graph_id(graph_id);
 }
 
 uint32_t AnfRuntimeAlgorithm::GetGraphId(const AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<const device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->graph_id();
 }
@@ -913,7 +934,7 @@ bool AnfRuntimeAlgorithm::IsFeatureMapOutput(const AnfNodePtr &node) {
   if (node->isa<ValueNode>()) {
     return false;
   }
-  auto kernel_info = node->kernel_info();
+  auto kernel_info = dynamic_cast<const device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->is_feature_map();
 }
@@ -1117,5 +1138,14 @@ TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, s
   }
   return GetCNodeOutputPrecision(kernel_with_index.first);
 }
+
+bool AnfRuntimeAlgorithm::IsCondControlKernel(const CNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(node);
+  if (node->inputs().empty()) {
+    MS_LOG(EXCEPTION) << "Illegal null input of cnode.";
+  }
+  auto input = node->input(kAnfPrimitiveIndex);
+  return IsPrimitive(input, prim::kPrimLabelGoto) || IsPrimitive(input, prim::kPrimLabelSwitch);
+}
 }  // namespace session
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.h b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
similarity index 94%
rename from mindspore/ccsrc/session/anf_runtime_algorithm.h
rename to mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
index 82056197935..4fa3150e367 100644
--- a/mindspore/ccsrc/session/anf_runtime_algorithm.h
+++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
@@ -25,24 +25,29 @@
 #include <memory>
 #include "ir/anf.h"
 #include "ir/dtype.h"
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/primitive.h"
-#include "device/device_address.h"
-#include "kernel/kernel.h"
-#include "kernel/kernel_build_info.h"
-#include "operator/ops.h"
+#include "runtime/device/device_address.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "frontend/operator/ops.h"
 #include "utils/contract.h"
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace session {
 using AnfVisitFuncion = std::function<Any(const AnfNodePtr &node, int index)>;
 using KernelWithIndex = std::pair<AnfNodePtr, size_t>;
+using DeviceAddress = device::DeviceAddress;
+using DeviceAddressPtr = device::DeviceAddressPtr;
 class AnfRuntimeAlgorithm {
  public:
+  // get real input node of tuple_get_item
+  static AnfNodePtr GetTupleGetItemRealInput(const CNodePtr &tuple_get_item);
+  static size_t GetTupleGetItemOutIndex(const CNodePtr &tuple_get_item);
   // get input_anf_node's real kernel by recurse
   static KernelWithIndex VisitKernel(const AnfNodePtr &input_anf_node, size_t output_index);
-  static KernelWithIndex VisitKernelWithReturnType(const AnfNodePtr &input_anf_node, size_t output_index,
+  static KernelWithIndex VisitKernelWithReturnType(const AnfNodePtr &input_anf_node, int output_index,
                                                    bool visit_nop_node = false,
                                                    const std::vector<PrimitivePtr> &return_types = {
                                                      prim::kPrimMakeTuple});
@@ -203,6 +208,7 @@ class AnfRuntimeAlgorithm {
   static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node);
   // get fix output precision from prev node, input_idx is the input index of current node related to prev node.
   static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx);
+  static bool IsCondControlKernel(const CNodePtr &node);
 };
 }  // namespace session
 using AnfAlgo = session::AnfRuntimeAlgorithm;
diff --git a/mindspore/ccsrc/session/ascend_control_parser.cc b/mindspore/ccsrc/backend/session/ascend_control_parser.cc
similarity index 54%
rename from mindspore/ccsrc/session/ascend_control_parser.cc
rename to mindspore/ccsrc/backend/session/ascend_control_parser.cc
index 0c97116c6eb..274b355679a 100644
--- a/mindspore/ccsrc/session/ascend_control_parser.cc
+++ b/mindspore/ccsrc/backend/session/ascend_control_parser.cc
@@ -14,12 +14,13 @@
  * limitations under the License.
  */
 
-#include "session/ascend_control_parser.h"
+#include "backend/session/ascend_control_parser.h"
 #include <utility>
 #include <memory>
-#include "session/anf_runtime_algorithm.h"
+#include <algorithm>
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/union_find_set.h"
-#include "device/ascend/ascend_label_assign.h"
+#include "runtime/device/ascend/ascend_label_assign.h"
 
 static constexpr size_t kCNodePrim = 0;
 static constexpr size_t kCNodeCallArg = 1;
@@ -31,94 +32,11 @@ static constexpr size_t kCNodePartialLength = 2;
 static constexpr size_t kCNodePartialFunc = 1;
 static constexpr size_t kCNodeSwitchLayerBranch = 2;
 static constexpr size_t kCNodeSwitchLayerLength = 3;
+static constexpr size_t kCNodeAssignTarget = 1;
+static constexpr size_t kCNodeAssignSource = 2;
 
 namespace mindspore {
 namespace session {
-static CNodePtr GetJumpNode(NotNull<KernelGraphPtr> parent_graph, NotNull<KernelGraphPtr> child_graph) {
-  auto &nodes = parent_graph->execution_order();
-  CNodePtr last_jump_node = nullptr;
-  for (auto &node : nodes) {
-    if (IsPrimitiveCNode(node, prim::kPrimLabelGoto)) {
-      if (child_graph->get_start_label() == node->input(kCNodeCallArg)) {
-        return node;
-      }
-      last_jump_node = node;
-    } else if (IsPrimitiveCNode(node, prim::kPrimLabelSwitch)) {
-      if (child_graph->get_start_label() == node->input(kCNodeSwitchFalse) ||
-          child_graph->get_start_label() == node->input(kCNodeSwitchTrue)) {
-        return node;
-      }
-      last_jump_node = node;
-    }
-  }
-  if (last_jump_node == nullptr) {
-    MS_LOG(EXCEPTION) << "Cannot find jump node from " << parent_graph->ToString() << " to " << child_graph->ToString();
-  }
-  return last_jump_node;
-}
-
-static void InitUnionFindSet(NotNull<KernelGraphPtr> kg, const NotNull<UnionFindSet<AnfNodePtr> *> union_find_set,
-                             const NotNull<std::set<KernelGraphPtr> *> memo) {
-  if (memo->find(kg.get()) != memo->end()) {
-    return;
-  }
-  memo->insert(kg.get());
-
-  const std::vector<std::pair<AnfNodePtr, std::vector<AnfNodePtr>>> &real_inputs = kg->real_inputs();
-  for (auto &iter : real_inputs) {
-    auto &para = iter.first;
-    MS_EXCEPTION_IF_NULL(para);
-    if (para->isa<Parameter>()) {
-      union_find_set->Add(para);
-    }
-    for (auto &arg : iter.second) {
-      MS_EXCEPTION_IF_NULL(arg);
-      if (!arg->isa<Parameter>()) {
-        continue;
-      }
-      union_find_set->Add(arg);
-    }
-  }
-  for (auto &child : kg->child_graph_order()) {
-    InitUnionFindSet(NOT_NULL(child), union_find_set, memo);
-  }
-}
-
-static void UnionParentParameter(NotNull<KernelGraphPtr> kg, const NotNull<UnionFindSet<AnfNodePtr> *> union_find_set,
-                                 const NotNull<std::set<KernelGraphPtr> *> memo) {
-  if (memo->find(kg.get()) != memo->end()) {
-    return;
-  }
-  memo->insert(kg.get());
-
-  const std::vector<std::pair<AnfNodePtr, std::vector<AnfNodePtr>>> &real_inputs = kg->real_inputs();
-  for (auto &iter : real_inputs) {
-    auto &para = iter.first;
-    for (auto &arg : iter.second) {
-      MS_EXCEPTION_IF_NULL(arg);
-      if (!arg->isa<Parameter>()) {
-        continue;
-      }
-      if (kg->unreuse_args().find(arg) != kg->unreuse_args().end()) {
-        continue;
-      }
-      union_find_set->Union(arg, para);
-    }
-  }
-  for (auto &child : kg->child_graph_order()) {
-    UnionParentParameter(NOT_NULL(child), union_find_set, memo);
-  }
-}
-
-static UnionFindSet<AnfNodePtr> MakeUnionFindSet(NotNull<KernelGraphPtr> root_kg) {
-  UnionFindSet<AnfNodePtr> result;
-  std::set<KernelGraphPtr> memo;
-  InitUnionFindSet(root_kg, NOT_NULL(&result), NOT_NULL(&memo));
-  memo.clear();
-  UnionParentParameter(root_kg, NOT_NULL(&result), NOT_NULL(&memo));
-  return result;
-}
-
 static void RecursiveReplaceNode(NotNull<KernelGraphPtr> kg, NotNull<AnfNodePtr> main_parameter,
                                  const std::set<AnfNodePtr> &parameter_reuse_set,
                                  const NotNull<std::set<KernelGraphPtr> *> memo) {
@@ -135,8 +53,9 @@ static void RecursiveReplaceNode(NotNull<KernelGraphPtr> kg, NotNull<AnfNodePtr>
       continue;
     }
     MS_EXCEPTION_IF_NULL(para);
-    MS_LOG(INFO) << "Replace " << para->DebugString() << " of graph " << AnfAlgo::GetGraphId(para.get()) << " to "
-                 << main_parameter->DebugString() << " of graph " << AnfAlgo::GetGraphId(main_parameter.get().get());
+    MS_LOG(INFO) << "In " << kg->ToString() << " replace " << para->DebugString() << " of graph "
+                 << AnfAlgo::GetGraphId(para.get()) << " to " << main_parameter->DebugString() << " of graph "
+                 << AnfAlgo::GetGraphId(main_parameter.get().get());
     kg->ReplaceNode(NOT_NULL(para), main_parameter);
   }
 
@@ -145,7 +64,7 @@ static void RecursiveReplaceNode(NotNull<KernelGraphPtr> kg, NotNull<AnfNodePtr>
   }
 }
 
-static AnfNodePtr GetMainParameter(NotNull<KernelGraphPtr> root_kg, const AnfNodePtr key,
+static AnfNodePtr GetMainParameter(NotNull<KernelGraphPtr> root_kg, const AnfNodePtr &key,
                                    const std::set<AnfNodePtr> &parameter_reuse_set) {
   AnfNodePtr main_parameter = key;
   std::set<AnfNodePtr> root_inputs_set;
@@ -160,8 +79,19 @@ static AnfNodePtr GetMainParameter(NotNull<KernelGraphPtr> root_kg, const AnfNod
   return main_parameter;
 }
 
-static void ReuseParameter(NotNull<KernelGraphPtr> root_kg, NotNull<UnionFindSet<AnfNodePtr> *> parameter_set) {
-  auto parameter_reuse_sets = parameter_set->GetSets();
+static void ReuseParameter(NotNull<KernelGraphPtr> root_kg,
+                           const std::vector<std::pair<AnfNodePtr, AnfNodePtr>> &link_list) {
+  // make union find set
+  UnionFindSet<AnfNodePtr> union_find_set;
+  for (auto &[param, arg] : link_list) {
+    union_find_set.Add(param);
+    union_find_set.Add(arg);
+  }
+  for (auto &[param, arg] : link_list) {
+    union_find_set.Union(param, arg);
+  }
+  auto parameter_reuse_sets = union_find_set.GetSets();
+
   for (auto &[key, parameter_reuse_set] : parameter_reuse_sets) {
     if (parameter_reuse_set.size() <= 1) {
       continue;
@@ -172,7 +102,7 @@ static void ReuseParameter(NotNull<KernelGraphPtr> root_kg, NotNull<UnionFindSet
   }
 }
 
-CNodePtr GetNextRealKernel(const std::vector<CNodePtr> &list, size_t start) {
+static CNodePtr GetNextRealKernel(const std::vector<CNodePtr> &list, size_t start) {
   for (size_t i = start; i < list.size() - 1; ++i) {
     if (!IsPrimitiveCNode(list[i], prim::kPrimPartial) && AnfAlgo::IsRealKernel(list[i])) {
       return list[i];
@@ -181,71 +111,287 @@ CNodePtr GetNextRealKernel(const std::vector<CNodePtr> &list, size_t start) {
   return nullptr;
 }
 
-void AscendControlParser::LinkGraph(NotNull<KernelGraphPtr> kg) {
-  std::set<KernelGraphPtr> memo;
-  (void)ProcessKernelGraph(kg, nullptr, nullptr, NOT_NULL(&memo));
-  device::ascend::AscendLabelAssign::GetInstance().AssignLabel(kg);
-  std::map<uint32_t, KernelGraphPtr> graph_id_map;
-  for (auto &g : memo) {
-    MS_EXCEPTION_IF_NULL(g);
-    if (graph_id_map.find(g->graph_id()) != graph_id_map.end()) {
-      MS_LOG(EXCEPTION) << "Two graph has same graph id " << g->graph_id()
-                        << ", graph: " << graph_id_map[g->graph_id()]->ToString() << " " << g->ToString();
+static void UpdateLabelIdToLabelSetMap(const std::vector<CNodePtr> &exec_order,
+                                       const NotNull<std::map<uint32_t, CNodePtr> *> label_id_to_label_set) {
+  for (auto &node : exec_order) {
+    MS_EXCEPTION_IF_NULL(node);
+    if (!IsPrimitiveCNode(node, prim::kPrimLabelSet)) {
+      continue;
     }
-    graph_id_map[g->graph_id()] = g;
+    if (!AnfAlgo::HasNodeAttr(kAttrLabelIndex, node)) {
+      MS_LOG(EXCEPTION) << node->DebugString() << " has no attr kAttrLabelIndex";
+    }
+    uint32_t label_id = AnfAlgo::GetNodeAttr<uint32_t>(node, kAttrLabelIndex);
+    if (auto iter = label_id_to_label_set->find(label_id); iter != label_id_to_label_set->end()) {
+      MS_LOG(EXCEPTION) << "There are more than one node has same label id " << label_id
+                        << ", node: " << iter->second->DebugString() << " and " << node->DebugString();
+    }
+    (*label_id_to_label_set)[label_id] = node;
+  }
+}
+
+static std::vector<CNodePtr> GetTargetLabelSetNodes(NotNull<CNodePtr> jump_node,
+                                                    const std::map<uint32_t, CNodePtr> &label_id_to_label_set) {
+  std::vector<uint32_t> target_label_list;
+  std::vector<CNodePtr> target_labelset_nodes;
+  if (IsPrimitiveCNode(jump_node.get(), prim::kPrimLabelGoto)) {
+    if (!AnfAlgo::HasNodeAttr(kAttrLabelIndex, jump_node)) {
+      MS_LOG(EXCEPTION) << jump_node->DebugString() << " has no attr kAttrLabelIndex";
+    }
+    uint32_t label_id = AnfAlgo::GetNodeAttr<uint32_t>(jump_node.get(), kAttrLabelIndex);
+    target_label_list.push_back(label_id);
+  } else if (IsPrimitiveCNode(jump_node.get(), prim::kPrimLabelSwitch)) {
+    if (!AnfAlgo::HasNodeAttr(kAttrLabelSwitchList, jump_node)) {
+      MS_LOG(EXCEPTION) << jump_node->DebugString() << " has no attr kPrimLabelSwitch";
+    }
+    target_label_list = AnfAlgo::GetNodeAttr<std::vector<uint32_t>>(jump_node.get(), kAttrLabelSwitchList);
+  } else {
+    MS_LOG(EXCEPTION) << "Unknown type jump node " << jump_node->DebugString();
   }
 
+  for (auto label_id : target_label_list) {
+    auto iter = label_id_to_label_set.find(label_id);
+    if (iter == label_id_to_label_set.end()) {
+      MS_LOG(EXCEPTION) << "Connot find LabelSet node has label id " << label_id;
+    }
+    target_labelset_nodes.push_back(iter->second);
+  }
+  return target_labelset_nodes;
+}
+
+static void EraseNodeFromExecOrder(const AnfNodePtr &node, const NotNull<std::vector<CNodePtr> *> exec_order) {
+  MS_EXCEPTION_IF_NULL(node);
+  auto exec_iter = std::find(exec_order->begin(), exec_order->end(), node);
+  if (exec_iter == exec_order->end()) {
+    MS_LOG(EXCEPTION) << "Cannot find " << node->DebugString() << " in exec order.";
+  }
+  exec_order->erase(exec_iter);
+}
+
+void AscendControlParser::LinkGraph(NotNull<KernelGraphPtr> kg) {
+  std::set<KernelGraphPtr> memo;
+  std::vector<std::pair<AnfNodePtr, AnfNodePtr>> link_list;
   // Insert Assign
-  ChildGraphDataAssign(graph_id_map);
-  // Make UnionFindSet
-  UnionFindSet<AnfNodePtr> parameter_set = MakeUnionFindSet(kg);
+  ChildGraphDataAssign(kg, NOT_NULL(&link_list), NOT_NULL(&memo));
   // Reuse Parameter
-  ReuseParameter(kg, NOT_NULL(&parameter_set));
+  ReuseParameter(kg, link_list);
+  // replace call by label goto / label switch
+  memo.clear();
+  (void)ProcessKernelGraph(kg, nullptr, nullptr, NOT_NULL(&memo));
+  // assign label resource
+  device::ascend::AscendLabelAssign::GetInstance().AssignLabel(kg);
+}
+
+void AscendControlParser::EraseParameter(NotNull<KernelGraphPtr> root_graph,
+                                         const std::set<KernelGraphPtr> &graph_list) {
+  std::vector<CNodePtr> exec_order = root_graph->execution_order();
+  std::set<CNodePtr> search_list(exec_order.begin(), exec_order.end());
+  std::set<AnfNodePtr> root_inputs(root_graph->inputs().begin(), root_graph->inputs().end());
+  auto ref_map = root_graph->GetRefMap();
+  ReferenceCounter parameter_count([](int32_t read, int32_t write) -> bool { return write == 1; });
+  std::multimap<AnfNodePtr, std::tuple<size_t, AnfNodePtr, size_t>> ref_multimap;
+  std::transform(ref_map.begin(), ref_map.end(), std::inserter(ref_multimap, ref_multimap.end()),
+                 [](const std::pair<std::pair<AnfNodePtr, size_t>, std::pair<AnfNodePtr, size_t>> &p)
+                   -> std::pair<AnfNodePtr, std::tuple<size_t, AnfNodePtr, size_t>> {
+                   return {p.first.first, {p.first.second, p.second.first, p.second.second}};
+                 });
+  std::set<CNodePtr> all_nodes;
+  std::map<AnfNodePtr, CNodePtr> para_to_written_node;
+  for (auto &graph : graph_list) {
+    auto out = graph->get_return();
+    MS_EXCEPTION_IF_NULL(out);
+    search_list.insert(out->cast<CNodePtr>());
+    auto nodes = TopoSort(out);
+    for (auto &node : nodes) {
+      MS_EXCEPTION_IF_NULL(node);
+      auto cnode = node->cast<CNodePtr>();
+      if (cnode != nullptr) {
+        all_nodes.insert(cnode);
+      }
+    }
+  }
+  // prepare referance count
+  for (auto &node : search_list) {
+    MS_EXCEPTION_IF_NULL(node);
+    // if assign node
+    std::set<AnfNodePtr> refed_parameters;
+    for (auto [iter, end] = ref_multimap.equal_range(node); iter != end; ++iter) {
+      refed_parameters.insert(std::get<1>(iter->second));
+    }
+
+    for (auto &in : node->inputs()) {
+      auto visit_node = AnfAlgo::VisitKernelWithReturnType(in, 0).first;
+      if (!visit_node->isa<Parameter>() || root_inputs.find(visit_node) != root_inputs.end()) {
+        continue;
+      }
+      if (refed_parameters.find(visit_node) != refed_parameters.end()) {
+        parameter_count.AddWriteCount(visit_node, 1);
+        para_to_written_node[visit_node] = node;
+      } else {
+        parameter_count.AddReadCount(visit_node, 1);
+      }
+    }
+  }
+
+  while (parameter_count.HasValidElem()) {
+    auto [para, read, written] = parameter_count.GetOneValidElem();
+    MS_LOG(INFO) << para->DebugString() << " was read " << read << " times, written " << written << " times.";
+    auto assign_iter = para_to_written_node.find(para);
+    if (assign_iter == para_to_written_node.end()) {
+      MS_LOG(EXCEPTION) << "Cannot find assign node that write " << para->DebugString();
+    }
+    auto &assign_node = assign_iter->second;
+    MS_EXCEPTION_IF_NULL(assign_node);
+    if (!IsPrimitiveCNode(assign_node, prim::kPrimAssign)) {
+      parameter_count.EraseElem(para);
+      continue;
+    }
+    MS_LOG(INFO) << "Erase " << assign_node->DebugString(5);
+    EraseNodeFromExecOrder(assign_node, NOT_NULL(&exec_order));
+
+    auto source = AnfAlgo::VisitKernelWithReturnType(assign_node->input(kCNodeAssignSource), 0).first;
+    parameter_count.AddReadCount(source, -1);
+    parameter_count.AddWriteCount(para, -1);
+    for (auto &node : all_nodes) {
+      for (size_t i = 0; i < node->size(); ++i) {
+        if (node->input(i) == para) {
+          MS_LOG_INFO << "Replace " << node->DebugString() << " input " << i << " by " << source->DebugString();
+          node->set_input(i, source);
+        }
+      }
+    }
+    parameter_count.AddReadCount(source, 1);
+    parameter_count.AddReadCount(para, -1);
+  }
+  root_graph->set_execution_order(exec_order);
+}
+
+void AscendControlParser::EraseLabel(NotNull<KernelGraphPtr> root_graph) {
+  std::vector<CNodePtr> exec_order = root_graph->execution_order();
+  ReferenceCounter label_count([](int32_t read, int32_t write) -> bool { return read <= 1; });
+  std::map<AnfNodePtr, CNodePtr> label_to_written_node;
+  std::map<uint32_t, CNodePtr> label_id_to_label_set;
+  UpdateLabelIdToLabelSetMap(exec_order, NOT_NULL(&label_id_to_label_set));
+  CNodePtr last_node = nullptr;
+  for (auto &cur_node : exec_order) {
+    MS_EXCEPTION_IF_NULL(cur_node);
+    if (AnfAlgo::IsCondControlKernel(cur_node)) {
+      std::vector<CNodePtr> target_labelset_nodes = GetTargetLabelSetNodes(NOT_NULL(cur_node), label_id_to_label_set);
+      for (auto &label_set : target_labelset_nodes) {
+        label_count.AddReadCount(label_set, 1);
+        label_to_written_node[label_set] = cur_node;
+      }
+    } else if (IsPrimitiveCNode(cur_node, prim::kPrimLabelSet)) {
+      label_count.AddWriteCount(cur_node, 1);
+      if (last_node != nullptr && !AnfAlgo::IsCondControlKernel(last_node)) {
+        label_count.AddReadCount(cur_node, 1);
+        label_to_written_node[cur_node] = last_node;
+      }
+    }
+    last_node = cur_node;
+  }
+
+  while (label_count.HasValidElem()) {
+    auto [label_set, read, written] = label_count.GetOneValidElem();
+    MS_LOG(INFO) << label_set->DebugString() << " was read " << read << " times, written " << written << " times.";
+    auto iter = label_to_written_node.find(label_set);
+    if (read > 0 && iter == label_to_written_node.end()) {
+      MS_LOG(EXCEPTION) << "Cannot find node jump to " << label_set->DebugString();
+    }
+    CNodePtr jump_node = read > 0 ? iter->second : nullptr;
+    if (jump_node == nullptr || IsPrimitiveCNode(jump_node, prim::kPrimLabelGoto)) {
+      MS_LOG(INFO) << "Erase node " << label_set->DebugString();
+      EraseNodeFromExecOrder(label_set, NOT_NULL(&exec_order));
+    }
+    if (jump_node != nullptr && IsPrimitiveCNode(jump_node, prim::kPrimLabelGoto)) {
+      MS_LOG(INFO) << "Erase node " << jump_node->DebugString();
+      EraseNodeFromExecOrder(jump_node, NOT_NULL(&exec_order));
+    }
+    label_count.EraseElem(label_set);
+  }
+
+  root_graph->set_execution_order(exec_order);
 }
 
 void AscendControlParser::ExecutorValidate(NotNull<KernelGraphPtr> root_graph) {
   std::set<KernelGraphPtr> memo;
   (void)RecurseGraph(root_graph, NOT_NULL(&memo));
+  EraseParameter(root_graph, memo);
+  EraseLabel(root_graph);
 }
 
-void AscendControlParser::ChildGraphDataAssign(const std::map<uint32_t, KernelGraphPtr> &graph_id_map) {
-  for (auto &iter : graph_id_map) {
-    auto &kg = iter.second;
-    MS_LOG(INFO) << "Data assign graph:" << kg->graph_id();
-    MS_EXCEPTION_IF_NULL(kg);
-    std::set<std::pair<AnfNodePtr, AnfNodePtr>> memo;
-    const std::vector<std::pair<AnfNodePtr, std::vector<AnfNodePtr>>> &real_inputs = kg->real_inputs();
-    for (auto &it : real_inputs) {
-      auto &parameter = it.first;
-      auto &args = it.second;
-      for (auto &arg : args) {
-        MS_EXCEPTION_IF_NULL(arg);
-        if (memo.find({parameter, arg}) != memo.end()) {
-          continue;
-        } else {
-          memo.emplace(parameter, arg);
-        }
-        auto unreuse_args_map = kg->unreuse_args();
-        auto unreuse_arg_iter = unreuse_args_map.find(arg);
-        if (unreuse_arg_iter == unreuse_args_map.end()) {
-          MS_EXCEPTION_IF_NULL(arg);
-          MS_EXCEPTION_IF_NULL(parameter);
-          if (!arg->isa<Parameter>()) {
-            MS_LOG(EXCEPTION) << "Reused arg must be parameter, arg:" << arg->DebugString() << ".";
-          }
-          MS_LOG(DEBUG) << "Parameter should be reused, no need insert assign, parameter: " << parameter->DebugString()
-                        << ", arg:" << arg->DebugString();
+std::vector<std::pair<KernelGraphPtr, std::vector<AnfNodePtr>>> AscendControlParser::ParseCallNode(
+  NotNull<CNodePtr> call_node) {
+  std::vector<std::pair<KernelGraphPtr, std::vector<AnfNodePtr>>> ret;
+  if (!IsPrimitiveCNode(call_node.get(), prim::kPrimCall)) {
+    MS_LOG(EXCEPTION) << "Node " << call_node->DebugString() << " is not a call node.";
+  }
+  if (call_node->size() <= kCNodeCallArg) {
+    MS_LOG(EXCEPTION) << "Node " << call_node->DebugString() << " has invalid inputs size " << call_node->size();
+  }
+  const std::vector<AnfNodePtr> &call_node_inputs = call_node->inputs();
+  auto call_arg = call_node_inputs[kCNodeCallArg];
+  MS_EXCEPTION_IF_NULL(call_arg);
+  if (IsValueNode<KernelGraph>(call_arg)) {
+    ret.emplace_back(GetValueNode<KernelGraphPtr>(call_arg),
+                     std::vector<AnfNodePtr>(call_node_inputs.begin() + kCNodeCallArg + 1, call_node_inputs.end()));
+  } else if (IsPrimitiveCNode(call_arg, prim::kPrimSwitch)) {
+    auto switch_cnode = call_arg->cast<CNodePtr>();
+    MS_EXCEPTION_IF_NULL(switch_cnode);
+    const std::vector<AnfNodePtr> &switch_inputs = switch_cnode->inputs();
+    if (switch_inputs.size() <= kCNodeSwitchCond) {
+      MS_LOG(EXCEPTION) << "Node " << switch_cnode->DebugString() << " has invalid inputs size "
+                        << switch_inputs.size();
+    }
+    for (auto iter = switch_inputs.begin() + kCNodeSwitchCond + 1; iter != switch_inputs.end(); ++iter) {
+      const auto &[target_graph, args] = ParsePartial(NOT_NULL(*iter));
+      ret.emplace_back(target_graph, args);
+    }
+  } else {
+    MS_LOG(EXCEPTION) << "Unsupport call node: " << call_node->DebugString(5);
+  }
+  return ret;
+}
+
+void AscendControlParser::ChildGraphDataAssign(
+  NotNull<KernelGraphPtr> kg, const NotNull<std::vector<std::pair<AnfNodePtr, AnfNodePtr>> *> link_list,
+  const NotNull<std::set<KernelGraphPtr> *> memo) {
+  if (memo->find(kg) != memo->end()) {
+    return;
+  }
+  memo->insert(kg.get());
+
+  MS_LOG(INFO) << "Start link data for " << kg->ToString();
+  const std::vector<CNodePtr> &nodes = kg->execution_order();
+
+  for (auto &node : nodes) {
+    if (!IsPrimitiveCNode(node, prim::kPrimCall)) {
+      continue;
+    }
+
+    auto child_graph_list = ParseCallNode(NOT_NULL(node));
+    for (auto &[child_graph, args] : child_graph_list) {
+      MS_EXCEPTION_IF_NULL(child_graph);
+      const std::vector<AnfNodePtr> &params = child_graph->inputs();
+      if (args.size() != params.size()) {
+        MS_LOG(EXCEPTION) << child_graph->ToString() << " needs " << params.size() << " inputs but call node "
+                          << node->DebugString(5) << " gives " << args.size();
+      }
+      for (size_t i = 0; i < args.size(); ++i) {
+        if (args[i]->isa<Parameter>() && memo->find(child_graph) == memo->end()) {
+          MS_LOG(INFO) << args[i]->DebugString() << " to " << params[i]->DebugString()
+                       << " should be reused, continue.";
+          link_list->emplace_back(args[i], params[i]);
           continue;
         }
-        auto target_graph_iter = graph_id_map.find(AnfAlgo::GetGraphId(arg.get()));
-        if (target_graph_iter == graph_id_map.end()) {
-          MS_LOG(EXCEPTION) << "Graph id " << AnfAlgo::GetGraphId(arg.get()) << " not found.";
-        }
-        InsertMultipleAssignToGraph(NOT_NULL(target_graph_iter->second), NOT_NULL(kg), NOT_NULL(arg),
-                                    NOT_NULL(parameter));
+
+        InsertMultipleAssignToGraph(kg, node, NOT_NULL(args[i]), NOT_NULL(params[i]));
       }
     }
-    kg->SetExecOrderByDefault();
+  }
+  kg->SetExecOrderByDefault();
+  for (auto &child_graph : kg->child_graph_order()) {
+    ChildGraphDataAssign(NOT_NULL(child_graph), link_list, memo);
   }
 }
 
@@ -325,7 +471,7 @@ void AscendControlParser::InsertDependToGraph(NotNull<KernelGraphPtr> kg, NotNul
   std::vector<AnfNodePtr> inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimDepend->name())),
                                     return_node->input(kFirstDataInputIndex), attch_node.get()};
   auto depend_node = kg->NewCNode(inputs);
-  return_node->set_input(1, depend_node);
+  return_node->set_input(kFirstDataInputIndex, depend_node);
 }
 
 void AscendControlParser::InsertControlDependToGraph(NotNull<KernelGraphPtr> kg, NotNull<AnfNodePtr> first_node,
@@ -381,6 +527,7 @@ void AscendControlParser::RecurseCall(NotNull<KernelGraphPtr> kg, NotNull<CNodeP
   new_inputs.push_back(sub_label);
   cur_node->set_inputs(new_inputs);
   cur_node->set_abstract(nullptr);
+  AnfAlgo::SetNodeAttr(kAttrChildGraph, MakeValue<std::vector<KernelGraphPtr>>({call_kg}), cur_node.get());
   MS_LOG(INFO) << "Succeed processing call func " << cur_node->DebugString();
 }
 
@@ -409,9 +556,12 @@ void AscendControlParser::RecurseSwitch(NotNull<KernelGraphPtr> kg, NotNull<CNod
   std::vector<AnfNodePtr> new_switch_inputs = {
     std::make_shared<ValueNode>(std::make_shared<Primitive>(kLabelSwitchOpName)),
     origin_switch_inputs[kCNodeSwitchCond]};
+  std::vector<KernelGraphPtr> child_graphs;
   for (size_t i = kCNodeSwitchCond + 1; i < kCNodeSwitchLength; ++i) {
     // 3.1 branch kernel graph and args
-    KernelGraphPtr branch_fg = ParsePartial(NOT_NULL(origin_switch_inputs[i]));
+    KernelGraphPtr branch_fg;
+    std::tie(branch_fg, std::ignore) = ParsePartial(NOT_NULL(origin_switch_inputs[i]));
+    child_graphs.push_back(branch_fg);
     // 3.2 recurse sub graph
     CNodePtr branch_label = ProcessKernelGraph(NOT_NULL(branch_fg), cur_node, back_label, memo);
     new_switch_inputs.push_back(branch_label);
@@ -420,6 +570,7 @@ void AscendControlParser::RecurseSwitch(NotNull<KernelGraphPtr> kg, NotNull<CNod
 
   cur_node->set_inputs(new_switch_inputs);
   cur_node->set_abstract(nullptr);
+  AnfAlgo::SetNodeAttr(kAttrChildGraph, MakeValue<std::vector<KernelGraphPtr>>(child_graphs), cur_node.get());
   MS_LOG(INFO) << "Succeed processing switch func " << cur_node->DebugString();
 }
 
@@ -453,9 +604,12 @@ void AscendControlParser::RecurseSwitchLayer(NotNull<KernelGraphPtr> kg, NotNull
   std::vector<AnfNodePtr> new_switch_inputs = {
     std::make_shared<ValueNode>(std::make_shared<Primitive>(kLabelSwitchOpName)),
     origin_switch_inputs[kCNodeSwitchCond]};
+  std::vector<KernelGraphPtr> child_graphs;
   for (size_t i = 0; i < branch_partial.size(); ++i) {
     // 3.1 branch kernel graph and args
-    KernelGraphPtr branch_fg = ParsePartial(NOT_NULL(origin_switch_inputs[i]));
+    KernelGraphPtr branch_fg;
+    std::tie(branch_fg, std::ignore) = ParsePartial(NOT_NULL(origin_switch_inputs[i]));
+    child_graphs.push_back(branch_fg);
     // 3.2 recurse sub graph
     CNodePtr branch_label = ProcessKernelGraph(NOT_NULL(branch_fg), cur_node, back_label, memo);
     new_switch_inputs.push_back(branch_label);
@@ -463,13 +617,14 @@ void AscendControlParser::RecurseSwitchLayer(NotNull<KernelGraphPtr> kg, NotNull
   new_switch_inputs.insert(new_switch_inputs.end(), branch_partial.begin(), branch_partial.end());
   cur_node->set_inputs(new_switch_inputs);
   cur_node->set_abstract(nullptr);
+  AnfAlgo::SetNodeAttr(kAttrChildGraph, MakeValue<std::vector<KernelGraphPtr>>(child_graphs), cur_node.get());
   MS_LOG(INFO) << "Succeed processing switch layer " << cur_node->DebugString();
 }
 
-KernelGraphPtr AscendControlParser::ParsePartial(NotNull<AnfNodePtr> node) {
+std::tuple<KernelGraphPtr, std::vector<AnfNodePtr>> AscendControlParser::ParsePartial(NotNull<AnfNodePtr> node) {
   if (!node.get()->isa<CNode>()) {
     if (IsValueNode<KernelGraph>(node)) {
-      return GetValueNode<KernelGraphPtr>(node);
+      return {GetValueNode<KernelGraphPtr>(node), {}};
     }
     MS_LOG(EXCEPTION) << "Switch branches must be partial, node: " << node->DebugString();
   }
@@ -485,12 +640,11 @@ KernelGraphPtr AscendControlParser::ParsePartial(NotNull<AnfNodePtr> node) {
     MS_LOG(EXCEPTION) << "Index out of range:" << partial_inputs.size() << ".";
   }
   auto branch_kg = GetValueNode<KernelGraphPtr>(partial_inputs[kCNodePartialFunc]);
-  return branch_kg;
+  return {branch_kg, std::vector<AnfNodePtr>(partial_inputs.begin() + kCNodePartialFunc + 1, partial_inputs.end())};
 }
 
-void AscendControlParser::InsertMultipleAssignToGraph(NotNull<KernelGraphPtr> from_graph,
-                                                      NotNull<KernelGraphPtr> to_graph, NotNull<AnfNodePtr> from,
-                                                      NotNull<AnfNodePtr> to) {
+void AscendControlParser::InsertMultipleAssignToGraph(NotNull<KernelGraphPtr> from_graph, const AnfNodePtr &jump_node,
+                                                      NotNull<AnfNodePtr> from, NotNull<AnfNodePtr> to) {
   std::vector<AnfNodePtr> from_outputs = AnfAlgo::GetAllOutput(from, {prim::kPrimTupleGetItem});
   std::vector<AnfNodePtr> to_outputs = AnfAlgo::GetAllOutput(to, {prim::kPrimTupleGetItem});
   MS_LOG(INFO) << "Insert multi-assign from [" << from->DebugString() << "] to [" << to->DebugString() << "]";
@@ -500,22 +654,35 @@ void AscendControlParser::InsertMultipleAssignToGraph(NotNull<KernelGraphPtr> fr
   }
   for (size_t i = 0; i < from_outputs.size(); i++) {
     auto assign_node = InsertAssignToGraph(from_graph, NOT_NULL(from_outputs[i]), NOT_NULL(to_outputs[i]));
-    if (assign_node != nullptr) {
-      auto jump_node = GetJumpNode(from_graph, to_graph);
-      const auto &from_graph_exe_order = from_graph->execution_order();
-      auto jump_node_iter = std::find(from_graph_exe_order.begin(), from_graph_exe_order.end(), jump_node);
-      if (jump_node_iter == from_graph_exe_order.end()) {
-        MS_EXCEPTION_IF_NULL(jump_node);
-        MS_LOG(EXCEPTION) << "Can't find node:" << jump_node->DebugString() << " in graph:" << from_graph->graph_id();
-      }
-      // insert assign between jump_node -1 and jump_node
-      if (jump_node_iter != from_graph_exe_order.begin()) {
-        InsertControlDependToGraph(from_graph, NOT_NULL(*(jump_node_iter - 1)), NOT_NULL(assign_node));
-      }
-      if (jump_node != nullptr) {
-        InsertControlDependToGraph(from_graph, NOT_NULL(assign_node), NOT_NULL(jump_node));
+    const auto &from_graph_exe_order = from_graph->execution_order();
+    std::vector<CNodePtr> real_exe_order(from_graph_exe_order.size());
+    size_t real_exe_order_size = 0;
+    std::copy_if(from_graph_exe_order.begin(), from_graph_exe_order.end(), real_exe_order.begin(),
+                 [&real_exe_order_size](const CNodePtr &node) -> bool {
+                   return (IsPrimitiveCNode(node, prim::kPrimSwitch) || IsPrimitiveCNode(node, prim::kPrimPartial))
+                            ? false
+                            : (++real_exe_order_size, true);
+                 });
+    real_exe_order.resize(real_exe_order_size);
+    if (jump_node == nullptr) {
+      if (!real_exe_order.empty()) {
+        InsertControlDependToGraph(from_graph, NOT_NULL(*(real_exe_order.rbegin())), NOT_NULL(assign_node));
+      } else {
+        InsertDependToGraph(from_graph, NOT_NULL(assign_node));
       }
+      continue;
     }
+
+    auto jump_node_iter = std::find(real_exe_order.begin(), real_exe_order.end(), jump_node);
+    if (jump_node_iter == real_exe_order.end()) {
+      MS_LOG(EXCEPTION) << "Cannot find jump node " << jump_node->DebugString() << " in graph "
+                        << from_graph->ToString();
+    }
+    // insert assign between jump_node -1 and jump_node
+    if (jump_node_iter != real_exe_order.begin()) {
+      InsertControlDependToGraph(from_graph, NOT_NULL(*(jump_node_iter - 1)), NOT_NULL(assign_node));
+    }
+    InsertControlDependToGraph(from_graph, NOT_NULL(assign_node), NOT_NULL(jump_node));
   }
 }
 
@@ -618,26 +785,45 @@ bool AscendControlParser::CheckLabelIndex(uint32_t order_index, uint32_t label_i
   }
 }
 
-void AscendControlParser::UpdateChildGraphOrder(NotNull<KernelGraphPtr> kg) {
-  MS_LOG(INFO) << "Graph id:" << kg->graph_id();
-  kg->SetExecOrderByDefault();
-  auto call_nodes = kg->FindNodeByPrimitive(std::make_shared<Primitive>(prim::kPrimCall->name()));
-  std::vector<KernelGraphPtr> child_graph_order;
-  for (auto &call_node : call_nodes) {
-    MS_EXCEPTION_IF_NULL(call_node);
-    auto call_child_graphs = AnfAlgo::GetCallNodeKernelGraph(call_node->cast<CNodePtr>());
-    for (const auto &child_graph : call_child_graphs) {
-      MS_EXCEPTION_IF_NULL(child_graph);
-      if (child_graph != kg->parent_graph()) {
-        child_graph->set_parent_graph(kg.get());
-      }
-      child_graph_order.push_back(child_graph);
-    }
+void AscendControlParser::ReferenceCounter::AddReadCount(const AnfNodePtr &key, int32_t num) {
+  auto iter = count_.find(key);
+  if (iter != count_.end()) {
+    iter->second.first += num;
+  } else {
+    count_[key] = {num, 0};
   }
-  for (size_t i = 0; i < child_graph_order.size(); i++) {
-    MS_LOG(INFO) << "Child graph[" << i << "][id:" << child_graph_order[i]->graph_id() << "]";
+}
+
+void AscendControlParser::ReferenceCounter::AddWriteCount(const AnfNodePtr &key, int32_t num) {
+  auto iter = count_.find(key);
+  if (iter != count_.end()) {
+    iter->second.second += num;
+  } else {
+    count_[key] = {0, num};
   }
-  kg->set_child_graph_order(child_graph_order);
+}
+
+void AscendControlParser::ReferenceCounter::EraseElem(const AnfNodePtr &key) { count_.erase(key); }
+
+bool AscendControlParser::ReferenceCounter::HasValidElem() const {
+  auto it = std::find_if(count_.begin(), count_.end(),
+                         [this](const std::pair<AnfNodePtr, std::pair<uint32_t, uint32_t>> &p) -> bool {
+                           auto &[read, written] = p.second;
+                           return predicate_(read, written);
+                         });
+  return it != count_.end();
+}
+
+std::tuple<AnfNodePtr, int32_t, int32_t> AscendControlParser::ReferenceCounter::GetOneValidElem() const {
+  auto it = std::find_if(count_.begin(), count_.end(),
+                         [this](const std::pair<AnfNodePtr, std::pair<uint32_t, uint32_t>> &p) -> bool {
+                           auto &[read, written] = p.second;
+                           return predicate_(read, written);
+                         });
+  if (it == count_.end()) {
+    MS_LOG(EXCEPTION) << "No valid parameter.";
+  }
+  return {it->first, it->second.first, it->second.second};
 }
 }  // namespace session
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/session/ascend_control_parser.h b/mindspore/ccsrc/backend/session/ascend_control_parser.h
similarity index 70%
rename from mindspore/ccsrc/session/ascend_control_parser.h
rename to mindspore/ccsrc/backend/session/ascend_control_parser.h
index 7530f2019ea..ac247351390 100644
--- a/mindspore/ccsrc/session/ascend_control_parser.h
+++ b/mindspore/ccsrc/backend/session/ascend_control_parser.h
@@ -20,7 +20,9 @@
 #include <map>
 #include <vector>
 #include <tuple>
-#include "session/kernel_graph.h"
+#include <utility>
+#include <functional>
+#include "backend/session/kernel_graph.h"
 #include "utils/base_ref.h"
 #include "utils/contract.h"
 #include "utils/union_find_set.h"
@@ -29,16 +31,23 @@ namespace mindspore {
 namespace session {
 class AscendControlParser {
  public:
-  static void ChildGraphDataAssign(const std::map<uint32_t, KernelGraphPtr> &graph_id_map);
   static void LinkGraph(NotNull<KernelGraphPtr> kg);
 
   static void InsertDependToGraph(NotNull<KernelGraphPtr> kg, NotNull<AnfNodePtr> attch_node);
   static void InsertControlDependToGraph(NotNull<KernelGraphPtr> kg, NotNull<AnfNodePtr> first_node,
                                          NotNull<AnfNodePtr> second_node);
   static void ExecutorValidate(NotNull<KernelGraphPtr> root_graph);
-  static void UpdateChildGraphOrder(NotNull<KernelGraphPtr> kg);
+  static void InsertMultipleAssignToGraph(NotNull<KernelGraphPtr> from_graph, const AnfNodePtr &jump_node,
+                                          NotNull<AnfNodePtr> from, NotNull<AnfNodePtr> to);
 
  private:
+  class ReferenceCounter;
+
+  static void EraseParameter(NotNull<KernelGraphPtr> root_graph, const std::set<KernelGraphPtr> &graph_list);
+  static void EraseLabel(NotNull<KernelGraphPtr> root_graph);
+  static void ChildGraphDataAssign(NotNull<KernelGraphPtr> kg,
+                                   const NotNull<std::vector<std::pair<AnfNodePtr, AnfNodePtr>> *> link_list,
+                                   const NotNull<std::set<KernelGraphPtr> *> memo);
   static NotNull<CNodePtr> GetStartLabel(NotNull<KernelGraphPtr> kg, const CNodePtr &last_node,
                                          const CNodePtr &last_label);
   static NotNull<CNodePtr> ProcessKernelGraph(NotNull<KernelGraphPtr> kg, const CNodePtr &last_node,
@@ -53,11 +62,10 @@ class AscendControlParser {
 
   static void LinkParentGraph(NotNull<KernelGraphPtr> kg, const CNodePtr &from_graph_call_node,
                               const CNodePtr &last_label);
-  static KernelGraphPtr ParsePartial(NotNull<AnfNodePtr> node);
 
-  static void InsertMultipleAssignToGraph(NotNull<KernelGraphPtr> from_graph, NotNull<KernelGraphPtr> to_graph,
-                                          NotNull<AnfNodePtr> from, NotNull<AnfNodePtr> to);
   static AnfNodePtr InsertAssignToGraph(NotNull<KernelGraphPtr> kg, NotNull<AnfNodePtr> from, NotNull<AnfNodePtr> to);
+  static std::vector<std::pair<KernelGraphPtr, std::vector<AnfNodePtr>>> ParseCallNode(NotNull<CNodePtr> call_node);
+  static std::tuple<KernelGraphPtr, std::vector<AnfNodePtr>> ParsePartial(NotNull<AnfNodePtr> node);
 
   // root graph order
   static bool CheckLabelIndex(uint32_t order_index, uint32_t label_index, const CNodePtr &cnode,
@@ -65,6 +73,19 @@ class AscendControlParser {
   static std::vector<CNodePtr> RecurseGraph(NotNull<KernelGraphPtr> graph,
                                             const NotNull<std::set<KernelGraphPtr> *> memo);
 };
+class AscendControlParser::ReferenceCounter {
+ public:
+  explicit ReferenceCounter(std::function<bool(int32_t, int32_t)> func) : predicate_(func), count_() {}
+  void AddReadCount(const AnfNodePtr &key, int32_t num);
+  void AddWriteCount(const AnfNodePtr &key, int32_t num);
+  void EraseElem(const AnfNodePtr &key);
+  bool HasValidElem() const;
+  std::tuple<AnfNodePtr, int32_t, int32_t> GetOneValidElem() const;
+
+ private:
+  std::function<bool(int32_t, int32_t)> predicate_;
+  std::map<AnfNodePtr, std::pair<int32_t, int32_t>> count_;
+};
 }  // namespace session
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/session/ascend_inference_session.cc b/mindspore/ccsrc/backend/session/ascend_inference_session.cc
similarity index 50%
rename from mindspore/ccsrc/session/ascend_inference_session.cc
rename to mindspore/ccsrc/backend/session/ascend_inference_session.cc
index aef7738d0b1..d251eb20398 100644
--- a/mindspore/ccsrc/session/ascend_inference_session.cc
+++ b/mindspore/ccsrc/backend/session/ascend_inference_session.cc
@@ -13,81 +13,21 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/ascend_inference_session.h"
-#include "operator/ops.h"
+#include "backend/session/ascend_inference_session.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
-#include "ir/tensor_py.h"
 #include "ir/anf.h"
-#include "ir/param_value_py.h"
-#include "device/kernel_runtime.h"
-#include "session/anf_runtime_algorithm.h"
+#include "ir/param_value.h"
+#include "runtime/device/kernel_runtime.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 #include "common/trans.h"
-#include "kernel/tbe/tbe_python_funcs.h"
+#include "backend/kernel_compiler/tbe/tbe_python_funcs.h"
 #include "utils/config_manager.h"
 #include "utils/base_ref_extends.h"
 
-using mindspore::tensor::TensorPy;
-
 namespace mindspore {
 namespace session {
-namespace {
-std::set<AnfNodePtr> weight_infos;
-static TypeId GetDataType(const py::buffer_info &buf) {
-  if (buf.format.size() == 1) {
-    switch (buf.format.front()) {
-      case 'e':
-      case 'f':
-      case 'd':
-        switch (buf.itemsize) {
-          case 2:
-            return TypeId::kNumberTypeFloat16;
-          case 4:
-            return TypeId::kNumberTypeFloat32;
-          case 8:
-            return TypeId::kNumberTypeFloat64;
-        }
-        break;
-      case 'b':
-      case 'h':
-      case 'i':
-      case 'l':
-      case 'q':
-        switch (buf.itemsize) {
-          case 1:
-            return TypeId::kNumberTypeInt8;
-          case 2:
-            return TypeId::kNumberTypeInt16;
-          case 4:
-            return TypeId::kNumberTypeInt32;
-          case 8:
-            return TypeId::kNumberTypeInt64;
-        }
-        break;
-      case 'B':
-      case 'H':
-      case 'I':
-      case 'L':
-      case 'Q':
-        switch (buf.itemsize) {
-          case 1:
-            return TypeId::kNumberTypeUInt8;
-          case 2:
-            return TypeId::kNumberTypeUInt16;
-          case 4:
-            return TypeId::kNumberTypeUInt32;
-          case 8:
-            return TypeId::kNumberTypeUInt64;
-        }
-        break;
-      case '?':
-        return TypeId::kNumberTypeBool;
-    }
-  }
-  MS_LOG(WARNING) << "Unsupported DataType format " << buf.format << " item size " << buf.itemsize;
-  return TypeId::kTypeUnknown;
-}
-}  // namespace
 void AscendInferenceSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
                                            const std::vector<tensor::TensorPtr> &inputs_const) const {
   MS_EXCEPTION_IF_NULL(kernel_graph);
@@ -105,23 +45,7 @@ void AscendInferenceSession::LoadInputData(const std::shared_ptr<KernelGraph> &k
     MS_EXCEPTION_IF_NULL(pk_node);
     auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0);
     MS_EXCEPTION_IF_NULL(device_address);
-    if (AnfAlgo::IsParameterWeight(pk_node)) {
-      if (weight_infos.count(pk_node) != 0) {
-        continue;
-      }
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(pk_node->default_param());
-      MS_EXCEPTION_IF_NULL(param_value);
-      auto py_param = param_value->value();
-      MS_EXCEPTION_IF_NULL(py_param);
-      py::array py_array = py_param.cast<py::array>();
-      py::buffer_info buf = py_array.request();
-      auto buf_type = GetDataType(buf);
-      if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0),
-                                            LongToSize(buf.size * buf.itemsize), buf_type, buf.ptr)) {
-        MS_LOG(EXCEPTION) << "SyncHostToDevice failed.";
-      }
-      weight_infos.insert(pk_node);
-    } else {
+    if (!AnfAlgo::IsParameterWeight(pk_node)) {
       tensor = inputs[no_weight_input++];
       if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0),
                                             LongToSize(tensor->data().nbytes()), tensor->data_type(),
@@ -131,5 +55,35 @@ void AscendInferenceSession::LoadInputData(const std::shared_ptr<KernelGraph> &k
     }
   }
 }
+
+GraphId AscendInferenceSession::CompileGraph(NotNull<FuncGraphPtr> func_graph) {
+  auto graph_id = AscendSession::CompileGraph(func_graph);
+  auto kernel_graph = GetGraph(graph_id);
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+  // load weight data to device
+  auto input_nodes = kernel_graph->inputs();
+  for (size_t i = 0; i < input_nodes.size(); ++i) {
+    if (!input_nodes[i]->isa<Parameter>()) {
+      MS_LOG(ERROR) << "Kernel graph inputs have anfnode which is not Parameter";
+      continue;
+    }
+    auto pk_node = input_nodes[i]->cast<ParameterPtr>();
+    MS_EXCEPTION_IF_NULL(pk_node);
+    auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0);
+    MS_EXCEPTION_IF_NULL(device_address);
+    if (AnfAlgo::IsParameterWeight(pk_node)) {
+      const auto &param_value = pk_node->default_param();
+      MS_EXCEPTION_IF_NULL(param_value);
+      auto tensor = std::dynamic_pointer_cast<tensor::Tensor>(param_value->value());
+      MS_EXCEPTION_IF_NULL(tensor);
+      if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0),
+                                            LongToSize(tensor->data().nbytes()), tensor->data_type(),
+                                            tensor->data_c())) {
+        MS_LOG(EXCEPTION) << "SyncHostToDevice failed.";
+      }
+    }
+  }
+  return graph_id;
+}
 }  // namespace session
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/session/ascend_inference_session.h b/mindspore/ccsrc/backend/session/ascend_inference_session.h
similarity index 82%
rename from mindspore/ccsrc/session/ascend_inference_session.h
rename to mindspore/ccsrc/backend/session/ascend_inference_session.h
index 53be881f93d..5364ae8d4ee 100644
--- a/mindspore/ccsrc/session/ascend_inference_session.h
+++ b/mindspore/ccsrc/backend/session/ascend_inference_session.h
@@ -24,11 +24,11 @@
 #include <map>
 #include <tuple>
 #include <set>
-#include "session/ascend_session.h"
-#include "session/kernel_graph.h"
-#include "kernel/kernel.h"
-#include "session/session_factory.h"
-#include "session/ascend_control_parser.h"
+#include "backend/session/ascend_session.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/session_factory.h"
+#include "backend/session/ascend_control_parser.h"
 
 namespace mindspore {
 namespace session {
@@ -38,6 +38,7 @@ class AscendInferenceSession : public AscendSession {
   ~AscendInferenceSession() = default;
   void LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
                      const std::vector<tensor::TensorPtr> &inputs_const) const;
+  GraphId CompileGraph(NotNull<FuncGraphPtr> func_graph) override;
 };
 MS_REG_SESSION(kDavinciInferenceDevice, AscendInferenceSession);
 }  // namespace session
diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc
similarity index 86%
rename from mindspore/ccsrc/session/ascend_session.cc
rename to mindspore/ccsrc/backend/session/ascend_session.cc
index f361cb26ca1..75bc4e2d058 100644
--- a/mindspore/ccsrc/session/ascend_session.cc
+++ b/mindspore/ccsrc/backend/session/ascend_session.cc
@@ -13,37 +13,37 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/ascend_session.h"
+#include "backend/session/ascend_session.h"
 #include <algorithm>
 #include <map>
 #include <tuple>
 #include <set>
 #include <string>
 #include <list>
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "ir/anf.h"
 #include "common/trans.h"
-#include "device/kernel_runtime.h"
-#include "device/ascend/kernel_select_ascend.h"
-#include "device/ascend/kernel_build_ascend.h"
-#include "device/ascend/ascend_kernel_runtime.h"
-#include "device/ascend/ascend_device_address.h"
-#include "pre_activate/ascend/ascend_backend_optimization.h"
-#include "pre_activate/common/common_backend_optimization.h"
-#include "device/kernel_adjust.h"
-#include "device/ascend/ascend_stream_assign.h"
-#include "device/ascend/ascend_label_assign.h"
+#include "runtime/device/kernel_runtime.h"
+#include "runtime/device/ascend/kernel_select_ascend.h"
+#include "runtime/device/ascend/kernel_build_ascend.h"
+#include "runtime/device/ascend/ascend_kernel_runtime.h"
+#include "runtime/device/ascend/ascend_device_address.h"
+#include "backend/optimizer/ascend/ascend_backend_optimization.h"
+#include "backend/optimizer/common/common_backend_optimization.h"
+#include "runtime/device/kernel_adjust.h"
+#include "runtime/device/ascend/ascend_stream_assign.h"
+#include "runtime/device/ascend/ascend_label_assign.h"
 #include "predict/predict.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "ir/scalar.h"
 #include "debug/anf_ir_dump.h"
 #include "debug/anf_ir_utils.h"
 #include "debug/draw.h"
 #include "common/utils.h"
-#include "pre_activate/common/helper.h"
-#include "device/kernel_runtime_manager.h"
-#include "kernel/tbe/tbe_python_funcs.h"
+#include "backend/optimizer/common/helper.h"
+#include "runtime/device/kernel_runtime_manager.h"
+#include "backend/kernel_compiler/tbe/tbe_python_funcs.h"
 #include "utils/config_manager.h"
 #include "utils/base_ref_extends.h"
 #include "debug/tensor_load.h"
@@ -51,6 +51,7 @@
 namespace mindspore {
 namespace session {
 const size_t kInvalidIndex = SIZE_MAX;
+constexpr size_t kReturnDataIndex = 1;
 namespace {
 void DumpGraphExeOrder(const std::vector<CNodePtr> &execution_order, const std::string &tag = "") {
   MS_LOG(INFO) << "Dump execution_order size " << execution_order.size();
@@ -288,6 +289,17 @@ static void RecurseToUpdateCallRealInput(NotNull<KernelGraphPtr> graph,
   // this action should from bottom to top
   graph->UpdateCallRealInput();
 }
+
+void InsertMakeTupleForOutput(NotNull<KernelGraphPtr> root_graph) {
+  auto return_node = root_graph->get_return();
+  MS_EXCEPTION_IF_NULL(return_node);
+  if (return_node->size() <= kReturnDataIndex) {
+    return;
+  }
+  auto make_tuple = root_graph->NewCNode(
+    {NewValueNode(std::make_shared<Primitive>(prim::kPrimMakeTuple->name())), root_graph->output()});
+  root_graph->set_output(make_tuple);
+}
 }  // namespace
 
 GraphId AscendSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) {
@@ -307,19 +319,36 @@ GraphId AscendSession::CompileGraph(NotNull<FuncGraphPtr> func_graph) {
   // empty graph dont entry to backend
   if (root_graph->execution_order().empty()) {
     MS_LOG(INFO) << root_graph->ToString() << " is empty graph.";
+    InsertMakeTupleForOutput(NOT_NULL(root_graph));
     root_graph->set_executable(false);
     InitRuntimeResource();
     return root_graph->graph_id();
   }
-  // split switch
-  SplitGraphs(NOT_NULL(root_graph));
+  // create parameter for multiple branch
+  std::set<KernelGraphPtr> memo;
+  CreateMultiBranchOutput(NOT_NULL(root_graph), NOT_NULL(&memo));
+  memo.clear();
   // insert goto labels and label_sets
   LinkChildGraphs(NOT_NULL(root_graph));
   // resource initialize
   InitRuntimeResource();
-  // recurse compile child root_graph
-  std::set<KernelGraphPtr> memo;
-  RecurseCompileGraph(NOT_NULL(root_graph), NOT_NULL(&memo));
+
+  IrFusionPass(NOT_NULL(root_graph), NOT_NULL(&memo));
+  memo.clear();
+
+  SelectKernel(NOT_NULL(root_graph));
+  memo.clear();
+
+  HardwareOptimize(NOT_NULL(root_graph), NOT_NULL(&memo));
+  memo.clear();
+
+  AssignStaticMemory(NOT_NULL(root_graph), NOT_NULL(&memo));
+  memo.clear();
+
+  UpdateRefOutputMap(NOT_NULL(root_graph), NOT_NULL(&memo));
+  memo.clear();
+  // add make_tuple to the output graph
+  InsertMakeTupleForOutput(NOT_NULL(root_graph));
   // root root_graph valiate,include genearte execute order and so on
   RootGraphExecutorValidate(NOT_NULL(root_graph));
   // adjust kernel
@@ -330,12 +359,18 @@ GraphId AscendSession::CompileGraph(NotNull<FuncGraphPtr> func_graph) {
   device::KernelAdjust::GetInstance().Profiling(NOT_NULL(root_graph.get()));
   // build kernel
   BuildKernel(root_graph);
+#ifdef ENABLE_DEBUGGER
+  if (debugger_) {
+    debugger_->PreExecute(root_graph);
+  }
+#endif
   // alloc mem
   MemoryAlloc(root_graph.get());
   // task generate
   GenerateTaskInfo(root_graph);
   // load task into device
   LoadTask(root_graph);
+  DumpAllGraphs(all_graphs);
   // return the root_graph id to backend
   auto graph_id = root_graph->graph_id();
   return graph_id;
@@ -405,6 +440,11 @@ void AscendSession::BuildGraph(GraphId graph_id) {
   BuildKernel(graph);
   auto ms_context = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(ms_context);
+#ifdef ENABLE_DEBUGGER
+  if (debugger_) {
+    debugger_->PreExecute(graph);
+  }
+#endif
   if (ms_context->precompile_only()) {
     MS_LOG(INFO) << "Precompile only, stop in build kernel step";
   } else {
@@ -417,7 +457,7 @@ void AscendSession::BuildGraph(GraphId graph_id) {
   }
   // sync the inital const tensor to device
   SyncInitialTenosrToDevice();
-  ExportChildGraphs(graph_id);
+  DumpAllGraphs({graph});
   MS_LOG(INFO) << "End";
 }
 
@@ -473,12 +513,6 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::
   LoadInputData(kernel_graph, inputs);
   // convert inputs to model
   predictmodel::StepConvertWeight(inputs);
-#ifdef ENABLE_DEBUGGER
-  // debugger pre-execution processing
-  if (debugger_) {
-    debugger_->PreExecute(kernel_graph);
-  }
-#endif
   {
     py::gil_scoped_release release;
     // run task on device
@@ -761,7 +795,7 @@ void AscendSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const
   MS_LOG(INFO) << "Finish!";
 }
 
-void AscendSession::ExportChildGraphs(const GraphId graph_id) {
+void AscendSession::DumpAllGraphs(const std::vector<KernelGraphPtr> &all_graphs) {
 #ifdef ENABLE_DUMP_IR
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
@@ -773,21 +807,11 @@ void AscendSession::ExportChildGraphs(const GraphId graph_id) {
   if (save_graphs_path.empty()) {
     save_graphs_path = ".";
   }
-  if (graph_id == final_graph_id_) {
-    const auto &graph_order = GetGraphOrder(final_graph_id_);
-    const auto &graph_type = GetGraphOrderType(final_graph_id_);
-    for (size_t i = 0; i < graph_order.size(); i++) {
-      if (graph_type[i] == BRANCH_END || graph_type[i] == BRANCH_START) {
-        continue;
-      }
-      const auto child_graph = GetGraph(graph_order[i]);
-      MS_LOG(DEBUG) << "Start export child graph " << graph_order[i];
-      MS_EXCEPTION_IF_NULL(child_graph);
-      std::string file_path = save_graphs_path + "/graph_build_" + std::to_string(child_graph->graph_id()) + ".ir";
-      DumpIR(file_path, child_graph, true);
-      DumpIRProto(child_graph, "vm_build_" + std::to_string(child_graph->graph_id()));
-      MS_LOG(DEBUG) << "End export child graph " << graph_order[i];
-    }
+  for (auto &graph : all_graphs) {
+    MS_EXCEPTION_IF_NULL(graph);
+    std::string file_path = save_graphs_path + "/graph_build_" + std::to_string(graph->graph_id()) + ".ir";
+    DumpIR(file_path, graph, true);
+    DumpIRProto(graph, "vm_build_" + std::to_string(graph->graph_id()));
   }
 #endif
 }
@@ -798,12 +822,14 @@ void AscendSession::LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph)
 #ifdef ENABLE_DEBUGGER
   auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
   MS_EXCEPTION_IF_NULL(runtime_instance);
-  DebugServices *debug_services = debugger_->get_debug_services();
-  TensorLoader *tensor_loader = debug_services->get_tensor_loader();
+  DebugServices *debug_services = debugger_->debug_services();
+  TensorLoader *tensor_loader = debug_services->tensor_loader();
+  // TensorData will be freed up here
   tensor_loader->EmptyTensor();
   uint32_t iter_num = tensor_loader->GetIterNum();
   tensor_loader->set_iter_num(++iter_num);
   (void)runtime_instance->LoadData(kernel_graph.get(), debugger_.get());
+  tensor_loader->EmptyPrevTensor();
 #endif
   MS_LOG(INFO) << "Finish!";
 }
@@ -1027,7 +1053,7 @@ void AscendSession::InsertSwitchToGraph(GraphId condition_graph_id, GraphId true
   // append switch at the end of condition graph
   auto return_node = condition_graph->get_return();
   MS_EXCEPTION_IF_NULL(return_node);
-  InsertControlDependToGraph(condition_graph_id, return_node->input(1), switch_node);
+  InsertControlDependToGraph(condition_graph_id, return_node->input(kReturnDataIndex), switch_node);
   MS_LOG(INFO) << "Finish!";
 }
 
@@ -1477,7 +1503,7 @@ void AscendSession::InsertStreamActiveToGraph(GraphId graph_id, uint32_t actived
   // append the active node at the end of from graph
   auto return_node = from_graph->get_return();
   MS_EXCEPTION_IF_NULL(return_node);
-  InsertControlDependToGraph(graph_id, return_node->input(1), active_node);
+  InsertControlDependToGraph(graph_id, return_node->input(kReturnDataIndex), active_node);
 }
 
 void AscendSession::InsertDependToGraph(GraphId graph_id, const AnfNodePtr &attch_node) {
@@ -1630,6 +1656,10 @@ void AscendSession::BackendOptimization(const std::vector<KernelGraphPtr> &all_g
 
 void AscendSession::SplitGraphs(NotNull<KernelGraphPtr> root_graph) {
   std::set<KernelGraphPtr> memo;
+  // if output of graph is nullptr,no need insert maketuple at the end of graph
+  if (root_graph->output() == nullptr) {
+    return;
+  }
   // if root graph output is a call node ,the root graph is condition graph of 'if' sentence
   auto root_graph_output = AnfAlgo::VisitKernelWithReturnType(root_graph->output(), 0).first;
   if (AnfAlgo::CheckPrimitiveType(root_graph_output, prim::kPrimCall)) {
@@ -1680,7 +1710,7 @@ void AscendSession::SplitGraph(NotNull<KernelGraphPtr> graph, const std::set<Pri
   bool split_flag = false;
   auto apply_list = GetCNodes(TopoSort(graph->get_return()));
   // update the root graph child graph order
-  AscendControlParser::UpdateChildGraphOrder(graph);
+  graph->UpdateChildGraphOrder();
   // get child list from current graph
   std::vector<std::vector<CNodePtr>> child_graph_lists = GetChildList(apply_list, cut_prims);
   if (child_graph_lists.size() > 1) {
@@ -1712,7 +1742,7 @@ void AscendSession::SplitGraph(NotNull<KernelGraphPtr> graph, const std::set<Pri
     }
     split_flag = true;
   }
-  AscendControlParser::UpdateChildGraphOrder(graph);
+  graph->UpdateChildGraphOrder();
   UpdateRealInput(graph, split_flag, memo);
   MS_LOG(INFO) << "Split graph[" << graph->graph_id() << "] end";
 }
@@ -1751,5 +1781,216 @@ void AscendSession::RecurseCompileGraph(NotNull<KernelGraphPtr> graph, const Not
     }
   }
 }
+
+void AscendSession::CreateMultiBranchOutput(NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo) {
+  if (memo->find(graph.get()) != memo->end()) {
+    return;
+  }
+  memo->insert(graph.get());
+
+  graph->UpdateChildGraphOrder();
+  for (auto &child_graph : graph->child_graph_order()) {
+    CreateMultiBranchOutput(NOT_NULL(child_graph), memo);
+  }
+
+  std::map<AnfNodePtr, AnfNodePtr> need_replace_list;
+  auto node_list = GetCNodes(TopoSort(graph->get_return()));
+  for (auto &node : node_list) {
+    if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimCall)) {
+      // create a parameter to store the output of multiple branch and set the parameter as the condition graph's output
+      // auto multi_output_param = graph->NewParameter();
+      auto origin_inputs = graph->inputs();
+      auto output_param = CreateNewParameterFromCNode(node, true, graph.get().get());
+      MS_EXCEPTION_IF_NULL(graph->MutableInputs());
+      graph->MutableInputs()->operator=(origin_inputs);
+      graph->AddChildGraphResult(output_param);
+
+      std::vector<AnfNodePtr> depend_inputs = {
+        graph->NewValueNode(NewValueNode(std::make_shared<Primitive>(prim::kPrimDepend->name()))), output_param, node};
+      auto depend = graph->NewCNode(depend_inputs);
+      need_replace_list.emplace(node, depend);
+      MS_LOG(INFO) << "Create parameter " << output_param->DebugString() << " for call node " << node->DebugString()
+                   << ", depend node is " << depend->DebugString();
+      // insert assign in order to transfer child graph output to parameter
+      auto child_graphs = AnfAlgo::GetCallNodeKernelGraph(node);
+      for (auto &child_graph : child_graphs) {
+        MS_EXCEPTION_IF_NULL(child_graph);
+        if (child_graph->get_output_null()) {
+          continue;
+        }
+        auto graph_output = child_graph->output();
+        AscendControlParser::InsertMultipleAssignToGraph(NOT_NULL(child_graph), nullptr, NOT_NULL(graph_output),
+                                                         NOT_NULL(output_param));
+      }
+    }
+  }
+  // searching for nodes' input to replace call by depend(parameter, call)
+  for (auto &node : node_list) {
+    for (size_t i = 0; i < node->size(); ++i) {
+      auto input = node->input(i);
+      auto iter = need_replace_list.find(input);
+      if (iter != need_replace_list.end()) {
+        node->set_input(i, iter->second);
+      }
+    }
+  }
+}
+
+void AscendSession::IrFusionPass(const NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo) {
+  if (memo->find(graph) != memo->end()) {
+    return;
+  }
+  memo->insert(graph.get());
+
+  opt::AscendBackendIRFusionOptimization(graph);
+  opt::AscendBackendFuseBasicOpt(graph, true);
+  opt::AscendBackendGraphKernelOpt(graph, true);
+  graph->SetExecOrderByDefault();
+
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  bool save_graphs = context_ptr->save_graphs_flag();
+  auto save_graphs_path = context_ptr->save_graphs_path();
+  if (save_graphs) {
+    if (save_graphs_path.empty()) {
+      save_graphs_path = ".";
+    }
+    std::string file_path =
+      save_graphs_path + "/" + "select_kernel_before" + "_graph_" + std::to_string(graph->graph_id()) + ".ir";
+    DumpIR(file_path, graph.get());
+  }
+
+  for (auto &child_graph : graph->child_graph_order()) {
+    IrFusionPass(NOT_NULL(child_graph), memo);
+  }
+}
+
+void AscendSession::SelectKernel(NotNull<KernelGraphPtr> root_graph) {
+  MS_LOG(INFO) << "Start select kernel.";
+  size_t raise_precision_count = 0;
+  size_t reduce_precision_count = 0;
+
+  std::set<KernelGraphPtr> memo;
+  (void)RecurseSelectKernelInfo(root_graph, NOT_NULL(&memo), &raise_precision_count, &reduce_precision_count);
+  memo.clear();
+
+  auto ms_context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(ms_context);
+  if (ms_context->execution_mode() == kGraphMode) {
+    if (raise_precision_count > 0) {
+      MS_LOG(WARNING) << "There has " << raise_precision_count
+                      << " node/nodes used raise precision to selected the kernel!";
+    }
+    if (reduce_precision_count > 0) {
+      MS_LOG(WARNING) << "There has " << raise_precision_count
+                      << " node/nodes used reduce precision to selected the kernel!";
+    }
+  }
+  MS_LOG(INFO) << "Finish!";
+}
+
+void AscendSession::RecurseSelectKernelInfo(NotNull<KernelGraphPtr> graph,
+                                            NotNull<std::set<KernelGraphPtr> *> const memo,
+                                            size_t *const raise_precision_count,
+                                            size_t *const reduce_precision_count) const {
+  if (memo->find(graph) != memo->end()) {
+    return;
+  }
+  memo->insert(graph.get());
+  MS_LOG(INFO) << "Start to select kernel info in graph: " << graph->graph_id();
+
+  for (const auto &cnode : graph->execution_order()) {
+    if (AnfAlgo::IsCondControlKernel(cnode)) {
+      std::vector<KernelGraphPtr> child_graphs;
+      if (AnfAlgo::HasNodeAttr(kAttrChildGraph, cnode)) {
+        child_graphs = AnfAlgo::GetNodeAttr<std::vector<KernelGraphPtr>>(cnode, kAttrChildGraph);
+      }
+      for (auto &child_graph : child_graphs) {
+        RecurseSelectKernelInfo(NOT_NULL(child_graph), memo, raise_precision_count, reduce_precision_count);
+      }
+    }
+
+    auto status = device::ascend::SelectKernelInfo(cnode);
+    if (status == device::ascend::kStatusRaisePrecision) {
+      (*raise_precision_count)++;
+    } else if (status == device::ascend::kStatusReducePrecision) {
+      (*reduce_precision_count)++;
+    }
+    MS_LOG(INFO) << "Select ApplyKernel: " << cnode->DebugString();
+  }
+
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  bool save_graphs = context_ptr->save_graphs_flag();
+  auto save_graphs_path = context_ptr->save_graphs_path();
+  if (save_graphs) {
+    if (save_graphs_path.empty()) {
+      save_graphs_path = ".";
+    }
+    std::string file_path =
+      save_graphs_path + "/" + "select_kernel_after" + "_graph_" + std::to_string(graph->graph_id()) + ".ir";
+    DumpIR(file_path, graph.get());
+  }
+  MS_LOG(INFO) << "Finish selecting kernel info in graph: " << graph->graph_id();
+}
+
+void AscendSession::HardwareOptimize(NotNull<KernelGraphPtr> graph,
+                                     NotNull<std::set<KernelGraphPtr> *> const memo) const {
+  if (memo->find(graph) != memo->end()) {
+    return;
+  }
+  memo->insert(graph.get());
+
+  MS_LOG(INFO) << "Start to do HardwareOptimize in graph: " << graph->graph_id();
+  // convert kernel Graph to model
+  predictmodel::StepConvertGraph(graph.get());
+
+  HardwareOptimize(graph.get());
+  for (auto &child_graph : graph->child_graph_order()) {
+    HardwareOptimize(NOT_NULL(child_graph), memo);
+  }
+  MS_LOG(INFO) << "Finish doing HardwareOptimize in graph: " << graph->graph_id();
+}
+
+void AscendSession::AssignStaticMemory(NotNull<KernelGraphPtr> graph,
+                                       NotNull<std::set<KernelGraphPtr> *> const memo) const {
+  if (memo->find(graph) != memo->end()) {
+    return;
+  }
+  memo->insert(graph.get());
+
+  MS_LOG(INFO) << "Start to assign static memory for parameter in graph: " << graph->graph_id();
+  // assign static memory for parameters
+  auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
+  MS_EXCEPTION_IF_NULL(runtime_instance);
+  runtime_instance->AssignStaticMemoryInput(graph.get().get());
+  runtime_instance->AssignStaticMemoryValueNode(graph.get().get());
+  for (auto &child_graph : graph->child_graph_order()) {
+    AssignStaticMemory(NOT_NULL(child_graph), memo);
+  }
+  MS_LOG(INFO) << "Finish assigning static memory for parameter in graph: " << graph->graph_id();
+}
+
+void AscendSession::UpdateRefOutputMap(NotNull<KernelGraphPtr> graph,
+                                       NotNull<std::set<KernelGraphPtr> *> const memo) const {
+  if (memo->find(graph) != memo->end()) {
+    return;
+  }
+  memo->insert(graph.get());
+
+  for (auto &child_graph : graph->child_graph_order()) {
+    UpdateRefOutputMap(NOT_NULL(child_graph), memo);
+    // copy ref map to final graph
+    auto child_ref_map = child_graph->GetRefMap();
+    for (auto &item : child_ref_map) {
+      if (graph->IsInRefOutputMap(item.first)) {
+        MS_LOG(WARNING) << "The ref pair <" << item.first.first->DebugString() << ", " << item.first.second
+                        << "> is already in " << graph->ToString();
+        continue;
+      }
+      graph->AddRefCorrespondPairs(item.first, item.second);
+    }
+  }
+}
 }  // namespace session
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/session/ascend_session.h b/mindspore/ccsrc/backend/session/ascend_session.h
similarity index 88%
rename from mindspore/ccsrc/session/ascend_session.h
rename to mindspore/ccsrc/backend/session/ascend_session.h
index 531860c3796..11cb1c92d26 100755
--- a/mindspore/ccsrc/session/ascend_session.h
+++ b/mindspore/ccsrc/backend/session/ascend_session.h
@@ -24,11 +24,11 @@
 #include <map>
 #include <tuple>
 #include <set>
-#include "session/session_basic.h"
-#include "session/kernel_graph.h"
-#include "kernel/kernel.h"
-#include "session/session_factory.h"
-#include "session/ascend_control_parser.h"
+#include "backend/session/session_basic.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/session/session_factory.h"
+#include "backend/session/ascend_control_parser.h"
 
 namespace mindspore {
 namespace session {
@@ -85,7 +85,7 @@ class AscendSession : public SessionBasic {
   void LoadTask(const std::shared_ptr<KernelGraph> &kernel_graph) const;
   void ExecTask(const std::shared_ptr<KernelGraph> &kernel_graph) const;
   void Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const;
-  void ExportChildGraphs(const GraphId graph_id);
+  void DumpAllGraphs(const std::vector<KernelGraphPtr> &all_graphs);
   void LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const;
   // below functions are used for run op
   void RunOpHardwareOptimize(const std::shared_ptr<session::KernelGraph> &kernel_graph) const;
@@ -151,6 +151,15 @@ class AscendSession : public SessionBasic {
   // sync intial tensors' data to device
   void SyncInitialTenosrToDevice();
   void SetFinalGraphSummaryFlag(const std::shared_ptr<KernelGraph> &kernel_graph);
+  // create parameter to receive data from multiple branch output
+  void CreateMultiBranchOutput(NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo);
+  void SelectKernel(NotNull<KernelGraphPtr> root_graph);
+  void RecurseSelectKernelInfo(NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> const memo,
+                               size_t *const raise_precision_count, size_t *const reduce_precision_count) const;
+  void IrFusionPass(const NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo);
+  void HardwareOptimize(const NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo) const;
+  void AssignStaticMemory(const NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo) const;
+  void UpdateRefOutputMap(const NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo) const;
 
   // member variables
   // key is final_graph_id,value is child graph execute order of final graph
diff --git a/mindspore/ccsrc/session/cpu_session.cc b/mindspore/ccsrc/backend/session/cpu_session.cc
similarity index 94%
rename from mindspore/ccsrc/session/cpu_session.cc
rename to mindspore/ccsrc/backend/session/cpu_session.cc
index 1927df2f494..ca1c78d2066 100644
--- a/mindspore/ccsrc/session/cpu_session.cc
+++ b/mindspore/ccsrc/backend/session/cpu_session.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include "session/cpu_session.h"
+#include "backend/session/cpu_session.h"
 #include <algorithm>
 #include "ir/tensor.h"
 #include "ir/anf.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "common/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_runtime.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_runtime.h"
 #include "predict/predict.h"
-#include "kernel/cpu/cpu_kernel_factory.h"
-#include "device/cpu/kernel_select_cpu.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+#include "runtime/device/cpu/kernel_select_cpu.h"
 #ifdef ENABLE_DEBUGGER
 #include "debug/debugger/debugger.h"
 #endif
diff --git a/mindspore/ccsrc/session/cpu_session.h b/mindspore/ccsrc/backend/session/cpu_session.h
similarity index 90%
rename from mindspore/ccsrc/session/cpu_session.h
rename to mindspore/ccsrc/backend/session/cpu_session.h
index 36b987e8405..b0dbd1cc2bb 100644
--- a/mindspore/ccsrc/session/cpu_session.h
+++ b/mindspore/ccsrc/backend/session/cpu_session.h
@@ -18,10 +18,10 @@
 #include <string>
 #include <memory>
 #include <vector>
-#include "session/session_basic.h"
-#include "session/kernel_graph.h"
-#include "device/cpu/cpu_kernel_runtime.h"
-#include "session/session_factory.h"
+#include "backend/session/session_basic.h"
+#include "backend/session/kernel_graph.h"
+#include "runtime/device/cpu/cpu_kernel_runtime.h"
+#include "backend/session/session_factory.h"
 namespace mindspore {
 namespace session {
 class CPUSession : public SessionBasic {
diff --git a/mindspore/ccsrc/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc
similarity index 92%
rename from mindspore/ccsrc/session/gpu_session.cc
rename to mindspore/ccsrc/backend/session/gpu_session.cc
index 7765e937589..14e30c1a443 100644
--- a/mindspore/ccsrc/session/gpu_session.cc
+++ b/mindspore/ccsrc/backend/session/gpu_session.cc
@@ -13,19 +13,19 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/gpu_session.h"
-#include "device/gpu/kernel_info_setter.h"
-#include "device/gpu/gpu_kernel_build.h"
-#include "device/gpu/gpu_kernel_runtime.h"
-#include "device/gpu/gpu_stream_assign.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "pre_activate/common/helper.h"
-#include "pre_activate/pass/communication_op_fusion.h"
-#include "pre_activate/pass/getitem_tuple.h"
-#include "pre_activate/gpu/adam_weight_decay_fusion.h"
-#include "pre_activate/gpu/adam_fusion.h"
-#include "device/kernel_runtime_manager.h"
+#include "backend/session/gpu_session.h"
+#include "runtime/device/gpu/kernel_info_setter.h"
+#include "runtime/device/gpu/gpu_kernel_build.h"
+#include "runtime/device/gpu/gpu_kernel_runtime.h"
+#include "runtime/device/gpu/gpu_stream_assign.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/pass/communication_op_fusion.h"
+#include "backend/optimizer/pass/getitem_tuple.h"
+#include "backend/optimizer/gpu/adam_weight_decay_fusion.h"
+#include "backend/optimizer/gpu/adam_fusion.h"
+#include "runtime/device/kernel_runtime_manager.h"
 #include "predict/predict.h"
 #include "common/utils.h"
 #include "common/trans.h"
@@ -121,7 +121,7 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
     if (input_node->isa<Parameter>() && AnfAlgo::OutputAddrExist(input_node, 0)) {
       auto pk_node = input_node->cast<ParameterPtr>();
       auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0);
-      auto tensor_address = tensor->device_address();
+      auto tensor_address = std::dynamic_pointer_cast<device::DeviceAddress>(tensor->device_address());
       bool need_sync = false;
       if (ms_context->enable_pynative_infer()) {
         if (tensor_address == nullptr || tensor_address != device_address) {
@@ -187,8 +187,7 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList
   GetSummaryNodes(graph.get());
   // Remove NoOp from execution graph
   opt::RemoveNopNode(graph.get());
-  // Alloc memory, including static memory and dynamic memory
-  AllocateMemory(graph.get());
+  // Set graph manager.
   MS_EXCEPTION_IF_NULL(context_);
   FuncGraphManagerPtr manager = MakeManager({graph});
   context_->AddManager(manager);
@@ -196,6 +195,8 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList
     manager->AddFuncGraph(graph);
     graph->set_manager(manager);
   }
+  // Alloc memory, including static memory and dynamic memory
+  AllocateMemory(graph.get());
   return graph_id;
 }
 
diff --git a/mindspore/ccsrc/session/gpu_session.h b/mindspore/ccsrc/backend/session/gpu_session.h
similarity index 95%
rename from mindspore/ccsrc/session/gpu_session.h
rename to mindspore/ccsrc/backend/session/gpu_session.h
index 4e46c2138d0..7e07dfbcbdb 100644
--- a/mindspore/ccsrc/session/gpu_session.h
+++ b/mindspore/ccsrc/backend/session/gpu_session.h
@@ -18,9 +18,9 @@
 
 #include <vector>
 #include <memory>
-#include "session/session_basic.h"
-#include "session/kernel_graph.h"
-#include "session/session_factory.h"
+#include "backend/session/session_basic.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/session_factory.h"
 using KernelGraph = mindspore::session::KernelGraph;
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/session/kernel_graph.cc b/mindspore/ccsrc/backend/session/kernel_graph.cc
similarity index 95%
rename from mindspore/ccsrc/session/kernel_graph.cc
rename to mindspore/ccsrc/backend/session/kernel_graph.cc
index 264e2c661b8..df810fe6efb 100644
--- a/mindspore/ccsrc/session/kernel_graph.cc
+++ b/mindspore/ccsrc/backend/session/kernel_graph.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include <algorithm>
 #include <queue>
 #include <unordered_set>
 #include <set>
-#include "operator/ops.h"
-#include "ir/param_value_py.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel_build_info.h"
-#include "device/kernel_runtime_manager.h"
-#include "kernel/common_utils.h"
+#include "frontend/operator/ops.h"
+#include "ir/param_value.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "runtime/device/kernel_runtime_manager.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace session {
@@ -380,9 +380,7 @@ ParameterPtr KernelGraph::NewParameter(const ParameterPtr &parameter) {
     new_parameter->set_abstract(parameter->abstract());
     new_parameter->set_name(parameter->name());
     if (AnfAlgo::IsParameterWeight(parameter)) {
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(parameter->default_param());
-      auto param_value_new = std::make_shared<ParamValuePy>(param_value->value());
-      new_parameter->set_default_param(param_value_new);
+      new_parameter->set_default_param(parameter->default_param());
       kernel_info->SetFeatureMapFlag(false);
     } else {
       kernel_info->SetFeatureMapFlag(true);
@@ -618,8 +616,8 @@ void KernelGraph::UpdateControlDependRelations(const std::vector<AnfNodePtr> &de
     if (AnfAlgo::HasNodeAttr(kControlDependMode, cnode)) {
       depend_mode = AnfAlgo::GetNodeAttr<int>(cnode, kControlDependMode);
     }
-    MS_LOG(INFO) << "Prior node[" << prior_node->DebugString() << "], depend node[" << depend_node->DebugString()
-                 << "], depend_mode :" << depend_mode << ".";
+    MS_LOG(DEBUG) << "Prior node[" << prior_node->DebugString() << "], depend node[" << depend_node->DebugString()
+                  << "], depend_mode :" << depend_mode << ".";
     if (prior_node->isa<Parameter>() && depend_mode == 1) {
       prior_nodes = GetOutputNodes(prior_node);
     }
@@ -649,7 +647,8 @@ void KernelGraph::UpdateControlDependRelations(const std::vector<AnfNodePtr> &de
         }
         MS_EXCEPTION_IF_NULL(first_node);
         MS_EXCEPTION_IF_NULL(second_node);
-        MS_LOG(INFO) << "Add first node:" << first_node->DebugString() << ",second node:" << second_node->DebugString();
+        MS_LOG(DEBUG) << "Add first node:" << first_node->DebugString()
+                      << ",second node:" << second_node->DebugString();
         AddDependEdge(second_node, first_node, 1);
       }
     }
@@ -750,6 +749,10 @@ bool KernelGraph::RemoveValueNodeFromGraph(const ValueNodePtr &value_node) {
 
 void KernelGraph::ReplaceNode(NotNull<AnfNodePtr> old_anf_node, NotNull<AnfNodePtr> new_anf_node) {
   MS_EXCEPTION_IF_NULL(inputs_);
+  {
+    std::queue<AnfNodePtr> seed_nodes;
+    UpdateNodeEdgeList(&seed_nodes);
+  }
   auto it = node_output_edges_.find(old_anf_node);
   if (it != node_output_edges_.end()) {
     const auto &outputs = it->second;
@@ -780,8 +783,10 @@ void KernelGraph::ReplaceNode(NotNull<AnfNodePtr> old_anf_node, NotNull<AnfNodeP
     // update front to backend map
     FrontBackendlMapUpdate(old_anf_node, new_anf_node);
   }
-  // if change the ir of graph, regenerate execution order of graph
-  SetExecOrderByDefault();
+  {
+    std::queue<AnfNodePtr> seed_nodes;
+    UpdateNodeEdgeList(&seed_nodes);
+  }
   // update graph inputs in child graph
   auto it_real_inputs = std::find_if(real_inputs_.begin(), real_inputs_.end(),
                                      [&old_anf_node](const std::pair<AnfNodePtr, std::vector<AnfNodePtr>> &n) -> bool {
@@ -987,6 +992,30 @@ bool KernelGraph::IsFinalOutputKernel(const AnfNodePtr &node) const {
   return false;
 }
 
+void KernelGraph::UpdateChildGraphOrder() {
+  MS_LOG(INFO) << "Update " << ToString() << " child graph order.";
+  SetExecOrderByDefault();
+  auto call_nodes = FindNodeByPrimitive(std::make_shared<Primitive>(prim::kPrimCall->name()));
+  std::vector<KernelGraphPtr> child_graph_order;
+  for (auto &call_node : call_nodes) {
+    MS_EXCEPTION_IF_NULL(call_node);
+    auto call_child_graphs = AnfAlgo::GetCallNodeKernelGraph(call_node->cast<CNodePtr>());
+    for (const auto &child_graph : call_child_graphs) {
+      MS_EXCEPTION_IF_NULL(child_graph);
+      if (child_graph != parent_graph_) {
+        auto shared_this = std::dynamic_pointer_cast<KernelGraph>(shared_from_this());
+        MS_EXCEPTION_IF_NULL(shared_this);
+        child_graph->set_parent_graph(shared_this);
+      }
+      child_graph_order.push_back(child_graph);
+    }
+  }
+  for (size_t i = 0; i < child_graph_order.size(); ++i) {
+    MS_LOG(INFO) << "Child graph[" << i << "][id:" << child_graph_order[i]->graph_id() << "]";
+  }
+  child_graph_order_ = child_graph_order;
+}
+
 std::string KernelGraph::ToString() const { return std::string("kernel_graph_").append(std::to_string(graph_id_)); }
 
 KernelGraph::~KernelGraph() { device::KernelRuntimeManager::Instance().ClearGraphResource(graph_id_); }
diff --git a/mindspore/ccsrc/session/kernel_graph.h b/mindspore/ccsrc/backend/session/kernel_graph.h
similarity index 94%
rename from mindspore/ccsrc/session/kernel_graph.h
rename to mindspore/ccsrc/backend/session/kernel_graph.h
index 6861d43de0d..48df351120b 100644
--- a/mindspore/ccsrc/session/kernel_graph.h
+++ b/mindspore/ccsrc/backend/session/kernel_graph.h
@@ -29,14 +29,14 @@
 #include "ir/anf.h"
 #include "utils/graph_utils.h"
 #include "utils/contract.h"
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 
 namespace mindspore {
 namespace session {
 using AnfWithOutIndex = std::pair<AnfNodePtr, size_t>;
 class KernelGraph : public FuncGraph {
  public:
-  KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false) {
+  KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false), current_epoch_(0) {
     inputs_ = std::make_shared<std::vector<AnfNodePtr>>();
     execution_order_ = {};
     executable_ = true;
@@ -154,6 +154,14 @@ class KernelGraph : public FuncGraph {
   AnfNodePtr GetFrontNodeByInternalOutput(const AnfNodePtr &node) const;
   void AddFinalOutputKernel(const AnfNodePtr &node);
   bool IsFinalOutputKernel(const AnfNodePtr &node) const;
+  uint32_t current_epoch() const { return current_epoch_; }
+  void set_current_epoch(uint32_t epoch) { current_epoch_ = epoch; }
+  void UpdateChildGraphOrder();
+  const std::vector<AnfNodePtr> &child_graph_result() const { return child_graph_result_; }
+  void AddChildGraphResult(const AnfNodePtr &parameter) { child_graph_result_.push_back(parameter); }
+  void set_child_graph_result(const std::vector<AnfNodePtr> &child_graph_result) {
+    child_graph_result_ = child_graph_result;
+  }
 
  private:
   // remove value node form graph
@@ -171,6 +179,7 @@ class KernelGraph : public FuncGraph {
   void UpdateControlDependRelations(const std::vector<AnfNodePtr> &depends);
 
   std::shared_ptr<std::vector<AnfNodePtr>> inputs_;
+  std::vector<AnfNodePtr> child_graph_result_;
   std::vector<CNodePtr> execution_order_;
   uint32_t graph_id_;
   uint32_t stream_distinction_label_;
@@ -216,6 +225,7 @@ class KernelGraph : public FuncGraph {
   std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_;
   std::unordered_map<AnfNodePtr, AnfNodePtr> internal_outputs_to_front_map_;
   std::set<AnfNodePtr> final_output_kernels_;
+  uint32_t current_epoch_;
 };
 }  // namespace session
 using KernelGraphPtr = std::shared_ptr<session::KernelGraph>;
diff --git a/mindspore/ccsrc/session/session.cc b/mindspore/ccsrc/backend/session/session.cc
similarity index 96%
rename from mindspore/ccsrc/session/session.cc
rename to mindspore/ccsrc/backend/session/session.cc
index ae70fc77aa5..95484a11132 100644
--- a/mindspore/ccsrc/session/session.cc
+++ b/mindspore/ccsrc/backend/session/session.cc
@@ -17,17 +17,17 @@
 #include <memory>
 #include <algorithm>
 #include "include/inference.h"
-#include "session/session.h"
+#include "backend/session/session.h"
 #include "utils/load_onnx/anf_converter.h"
-#include "session/session_basic.h"
-#include "session/session_factory.h"
+#include "backend/session/session_basic.h"
+#include "backend/session/session_factory.h"
 #include "utils/base_ref_utils.h"
-#include "kernel/oplib/oplib.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 #ifdef ENABLE_D
 #include "utils/context/ms_context.h"
-#include "session/ascend_session.h"
+#include "backend/session/ascend_session.h"
 #else
-#include "session/cpu_session.h"
+#include "backend/session/cpu_session.h"
 #endif
 
 namespace py = pybind11;
diff --git a/mindspore/ccsrc/session/session.h b/mindspore/ccsrc/backend/session/session.h
similarity index 97%
rename from mindspore/ccsrc/session/session.h
rename to mindspore/ccsrc/backend/session/session.h
index b608163067d..6ea9cfaa474 100644
--- a/mindspore/ccsrc/session/session.h
+++ b/mindspore/ccsrc/backend/session/session.h
@@ -23,7 +23,7 @@
 #include <memory>
 #include <map>
 
-#include "session/session_basic.h"
+#include "backend/session/session_basic.h"
 #include "ir/anf.h"
 #include "include/inference.h"
 
diff --git a/mindspore/ccsrc/session/session_basic.cc b/mindspore/ccsrc/backend/session/session_basic.cc
similarity index 89%
rename from mindspore/ccsrc/session/session_basic.cc
rename to mindspore/ccsrc/backend/session/session_basic.cc
index 91e430182ca..9755dfc7d0c 100644
--- a/mindspore/ccsrc/session/session_basic.cc
+++ b/mindspore/ccsrc/backend/session/session_basic.cc
@@ -13,24 +13,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/session_basic.h"
+#include "backend/session/session_basic.h"
 #include <utility>
 #include <algorithm>
 #include <unordered_map>
 #include <unordered_set>
-#include "pipeline/parse/data_converter.h"
+#include "pipeline/jit/parse/data_converter.h"
 #include "ir/manager.h"
-#include "ir/param_value_py.h"
-#include "kernel/common_utils.h"
-#include "operator/ops.h"
+#include "ir/param_value.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "frontend/operator/ops.h"
 #include "common/trans.h"
 #include "utils/context/ms_context.h"
 #include "utils/config_manager.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/oplib/oplib.h"
-#include "pre_activate/common/common_backend_optimization.h"
-#include "pre_activate/pass/const_input_to_attr_registry.h"
-#include "pre_activate/common/helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/optimizer/common/common_backend_optimization.h"
+#include "backend/optimizer/pass/const_input_to_attr_registry.h"
+#include "backend/optimizer/common/helper.h"
 #include "common/utils.h"
 #include "ir/dtype.h"
 #include "ir/anf.h"
@@ -38,12 +38,12 @@
 
 namespace mindspore {
 namespace session {
-static std::shared_ptr<std::map<PyObject *, ParameterPtr>> python_paras_;
-void ClearPythonParasMap() { python_paras_ = nullptr; }
+static std::shared_ptr<std::map<ParamValuePtr, ParameterPtr>> python_paras;
+void ClearPythonParasMap() { python_paras = nullptr; }
 namespace {
 const int kSummaryGetItem = 2;
 
-PyObject *GetParamDefaultInputTensor(const AnfNodePtr &node) {
+ParamValuePtr GetParamDefaultValue(const AnfNodePtr &node) {
   if (node == nullptr) {
     return nullptr;
   }
@@ -51,10 +51,7 @@ PyObject *GetParamDefaultInputTensor(const AnfNodePtr &node) {
   if (parameter == nullptr || !parameter->has_default()) {
     return nullptr;
   }
-  auto param_value = std::dynamic_pointer_cast<ParamValuePy>(parameter->default_param());
-  MS_EXCEPTION_IF_NULL(param_value);
-  auto py_param = param_value->value();
-  return py_param.ptr();
+  return parameter->default_param();
 }
 
 BaseRef CreateOneTensor(const AnfNodePtr &node, size_t output_index, const KernelGraph &graph,
@@ -77,7 +74,7 @@ BaseRef CreateOneTensor(const AnfNodePtr &node, size_t output_index, const Kerne
           return input_tensors[input_idx];
         }
       }
-      MS_LOG(EXCEPTION) << "Parameter : " << node->DebugString() << "has no output addr";
+      MS_LOG(EXCEPTION) << "Parameter : " << node->DebugString() << " has no output addr";
     }
   }
   // if proccess reach here,it remarks item_with_index is a real node(Parameter,or executable CNode)
@@ -110,8 +107,8 @@ BaseRef CreateOneTensor(const AnfNodePtr &node, size_t output_index, const Kerne
   return tensor;
 }
 
-BaseRef CreatTensorForOutput(const AnfNodePtr &anf, const KernelGraph &graph,
-                             const std::vector<tensor::TensorPtr> &input_tensors) {
+BaseRef CreateTensorForOutput(const AnfNodePtr &anf, const KernelGraph &graph,
+                              const std::vector<tensor::TensorPtr> &input_tensors) {
   MS_EXCEPTION_IF_NULL(anf);
   MS_LOG(INFO) << "Create tensor for output[" << anf->DebugString() << "]";
   auto item_with_index = AnfAlgo::VisitKernelWithReturnType(anf, 0);
@@ -123,7 +120,7 @@ BaseRef CreatTensorForOutput(const AnfNodePtr &anf, const KernelGraph &graph,
     MS_EXCEPTION_IF_NULL(cnode);
     VectorRef ret;
     for (size_t i = 1; i < cnode->inputs().size(); ++i) {
-      auto out = CreatTensorForOutput(cnode->input(i), graph, input_tensors);
+      auto out = CreateTensorForOutput(cnode->input(i), graph, input_tensors);
       ret.push_back(out);
     }
     return ret;
@@ -136,25 +133,6 @@ BaseRef CreatTensorForOutput(const AnfNodePtr &anf, const KernelGraph &graph,
   return CreateOneTensor(item_with_index.first, item_with_index.second, graph, input_tensors);
 }
 
-BaseRef CreatTupleForOutput(const AnfNodePtr &anf, const KernelGraph &graph,
-                            const std::vector<tensor::TensorPtr> &input_tensors) {
-  MS_EXCEPTION_IF_NULL(anf);
-  if (!AnfAlgo::IsRealKernel(anf)) {
-    MS_LOG(EXCEPTION) << "Anf[" << anf->DebugString() << "] should be a executable kernel";
-  }
-  if (anf->isa<ValueNode>()) {
-    return CreateOneTensor(anf, 0, graph, input_tensors);
-  }
-  VectorRef ret;
-  if (anf->isa<CNode>() && AnfAlgo::GetCNodeName(anf) != prim::kPrimMakeTuple->name()) {
-    for (size_t i = 0; i < AnfAlgo::GetOutputTensorNum(anf); ++i) {
-      auto out = CreateOneTensor(anf, i, graph, input_tensors);
-      ret.emplace_back(out);
-    }
-  }
-  return ret;
-}
-
 ValueNodePtr CreateNewValueNode(const AnfNodePtr &anf, KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(anf);
   MS_EXCEPTION_IF_NULL(graph);
@@ -178,8 +156,8 @@ size_t LoadCtrlInputTensor(const std::shared_ptr<KernelGraph> &graph, std::vecto
   if (inputs_params == nullptr) {
     return 0;
   }
-  if (inputs_params->empty()) {
-    MS_LOG(EXCEPTION) << "Illegal empty inputs_params";
+  if (inputs_params->size() < 2) {
+    MS_LOG(EXCEPTION) << "Illegal inputs_params size";
   }
   auto tensor = (*inputs_params)[0];
   MS_EXCEPTION_IF_NULL(tensor);
@@ -190,6 +168,18 @@ size_t LoadCtrlInputTensor(const std::shared_ptr<KernelGraph> &graph, std::vecto
   // set loop_count to zero
   MS_EXCEPTION_IF_NULL(inputs);
   inputs->push_back(tensor);
+
+  auto epoch_tensor = (*inputs_params)[1];
+  MS_EXCEPTION_IF_NULL(epoch_tensor);
+  auto *epoch_val = static_cast<int32_t *>(epoch_tensor->data_c());
+  MS_EXCEPTION_IF_NULL(epoch_val);
+  *epoch_val = graph->current_epoch();
+  epoch_tensor->set_dirty(true);
+  inputs->push_back(epoch_tensor);
+  MS_LOG(INFO) << "Load epoch_val:" << *epoch_val;
+
+  graph->set_current_epoch(graph->current_epoch() + 1);
+
   return inputs_params->size();
 }
 
@@ -215,20 +205,20 @@ ParameterPtr ConstructRunOpParameter(const std::shared_ptr<KernelGraph> &graph,
   auto param = graph->NewParameter();
   MS_EXCEPTION_IF_NULL(param);
   if (tensor_mask == kParameterWeightTensorMask) {
-    py::object obj;
-    auto param_value_new = std::make_shared<ParamValuePy>(obj);
+    auto param_value_new = std::make_shared<ParamValue>();
     param->set_default_param(param_value_new);
   }
   // set the kernel info of parameter
   auto kernel_build_info_builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
   MS_EXCEPTION_IF_NULL(input_tensor);
-  if (input_tensor->device_address().get() == nullptr) {
+  auto device_address = std::dynamic_pointer_cast<device::DeviceAddress>(input_tensor->device_address());
+  if (device_address == nullptr) {
     kernel_build_info_builder->SetOutputsFormat(std::vector<std::string>{kOpFormat_DEFAULT});
     TypeId param_init_data_type = AnfAlgo::IsParameterWeight(param) ? kTypeUnknown : input_tensor->data_type();
     kernel_build_info_builder->SetOutputsDeviceType(std::vector<TypeId>{param_init_data_type});
   } else {
-    kernel_build_info_builder->SetOutputsFormat(std::vector<std::string>{input_tensor->device_address()->format()});
-    kernel_build_info_builder->SetOutputsDeviceType(std::vector<TypeId>{input_tensor->device_address()->type_id()});
+    kernel_build_info_builder->SetOutputsFormat(std::vector<std::string>{device_address->format()});
+    kernel_build_info_builder->SetOutputsDeviceType(std::vector<TypeId>{device_address->type_id()});
   }
   AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), param.get());
   // construct abstract of parameter
@@ -311,7 +301,7 @@ void SessionBasic::InitInternalOutputParameter(const AnfNodePtr &out_node, const
   if (ref_real_node->isa<CNode>() && node_graph->IsInternalOutput(ref_real_node) &&
       node_graph->IsFinalOutputKernel(ref_real_node)) {
     auto kernel_info = ref_real_node->kernel_info();
-    if (kernel_info == nullptr || kernel_info->select_kernel_build_info() == nullptr) {
+    if (kernel_info == nullptr || !kernel_info->has_build_info()) {
       MS_LOG(INFO) << "No kernel info";
       return;
     }
@@ -322,9 +312,9 @@ void SessionBasic::InitInternalOutputParameter(const AnfNodePtr &out_node, const
     }
     auto format = AnfAlgo::GetOutputFormat(ref_real_node, ref_real_node_index);
     auto type = AnfAlgo::GetOutputDeviceDataType(ref_real_node, ref_real_node_index);
-    parameter->set_kernel_info(std::make_shared<device::KernelInfo>());
-    auto d_kernel_info = parameter->kernel_info();
+    auto d_kernel_info = std::make_shared<device::KernelInfo>();
     MS_EXCEPTION_IF_NULL(d_kernel_info);
+    parameter->set_kernel_info(d_kernel_info);
     kernel::KernelBuildInfo::KernelBuildInfoBuilder builder;
     builder.SetOutputsDeviceType({type});
     builder.SetOutputsFormat({format});
@@ -384,24 +374,24 @@ ParameterPtr SessionBasic::CreateNewParameterFromParameter(const AnfNodePtr &anf
     MS_LOG(EXCEPTION) << "Anf[" << anf->DebugString() << "] is not a parameter";
   }
   MS_EXCEPTION_IF_NULL(graph);
-  auto m_tensor = GetParamDefaultInputTensor(anf);
+  auto param_value = GetParamDefaultValue(anf);
   auto valid_inputs = graph->MutableValidInputs();
   MS_EXCEPTION_IF_NULL(valid_inputs);
   auto graph_inputs = graph->MutableInputs();
   MS_EXCEPTION_IF_NULL(graph_inputs);
   ParameterPtr new_parameter = nullptr;
   // if parameter's python parameter has been exist a backend parameter, reuse the exist parameter
-  if (python_paras_ == nullptr) {
-    python_paras_ = std::make_shared<std::map<PyObject *, ParameterPtr>>();
+  if (python_paras == nullptr) {
+    python_paras = std::make_shared<std::map<ParamValuePtr, ParameterPtr>>();
   }
-  auto iter = python_paras_->find(m_tensor);
-  if (iter != python_paras_->end()) {
+  auto iter = python_paras->find(param_value);
+  if (iter != python_paras->end()) {
     new_parameter = iter->second;
   } else {
     TraceManager::DebugTrace(std::make_shared<TraceCopy>(anf->debug_info()));
     new_parameter = graph->NewParameter(anf->cast<ParameterPtr>());
-    if (m_tensor != nullptr) {
-      (*python_paras_)[m_tensor] = new_parameter;
+    if (param_value != nullptr) {
+      (*python_paras)[param_value] = new_parameter;
     }
     TraceManager::EndTrace();
   }
@@ -485,15 +475,13 @@ CNodePtr SessionBasic::CreateNewCNode(const CNodePtr &cnode, bool valid_input, K
     } else if (optimize_depend && input_idx == kDependAttachNodeIndex) {
       cnode_inputs.push_back(origin_inputs[kRealInputIndexInDepend]);
       continue;
-    } else if (anf->isa<AnfNode>()) {
+    } else {
       *from_other_graph = true;
       // the input node is a cnode from other graph
       auto parameter_from_cnode = CreateNewParameterFromCNode(anf, valid_input, graph);
       cnode_inputs.push_back(parameter_from_cnode);
       (*other_graph_cnode)[anf] = parameter_from_cnode;
-      continue;
     }
-    MS_LOG(EXCEPTION) << "Unexpected input[" << anf->DebugString() << "]";
   }
   TraceManager::DebugTrace(std::make_shared<TraceCopy>(cnode->debug_info()));
   auto new_cnode = graph->NewCNode(cnode_inputs);
@@ -501,7 +489,50 @@ CNodePtr SessionBasic::CreateNewCNode(const CNodePtr &cnode, bool valid_input, K
   return new_cnode;
 }
 
-static std::vector<AnfNodePtr> CreateSwitchOrPartialNode(const CNodePtr &cnode, KernelGraph *graph) {
+CNodePtr SessionBasic::CreateSwitchInput(const AnfNodePtr &node_input, KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(node_input);
+  MS_EXCEPTION_IF_NULL(graph);
+  // switch input generalizes partial
+  if (AnfAlgo::CheckPrimitiveType(node_input, prim::kPrimPartial) ||
+      AnfAlgo::CheckPrimitiveType(node_input, prim::kPrimCall)) {
+    return node_input->cast<CNodePtr>();
+  }
+  if (node_input->isa<CNode>()) {
+    MS_LOG(EXCEPTION) << "If switch input is " << node_input->DebugString() << ", it mast be partial or call.";
+  }
+  std::vector<AnfNodePtr> partial_inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimPartial->name()))};
+  if (node_input->isa<ValueNode>() && IsValueNode<FuncGraph>(node_input)) {
+    partial_inputs.emplace_back(node_input);
+    auto partial_node = graph->NewCNode(partial_inputs);
+    return partial_node;
+  }
+  KernelGraphPtr kernel_graph = NewKernelGraph();
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+  kernel_graph->set_output(graph->GetBackendAnfByFrontAnf(node_input));
+  partial_inputs.emplace_back(std::make_shared<ValueNode>(kernel_graph));
+  auto partial_node = graph->NewCNode(partial_inputs);
+  return partial_node;
+}
+
+CNodePtr SessionBasic::HandleSwitchInputs(const AnfNodePtr &anf_node, KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(anf_node);
+  MS_EXCEPTION_IF_NULL(graph);
+  auto node = anf_node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(node);
+  if (node->inputs().size() < kSwitchInputSize) {
+    MS_LOG(EXCEPTION) << "Switch input size less than " << kSwitchInputSize;
+  }
+  auto primitive = NewValueNode(std::make_shared<Primitive>(prim::kPrimSwitch->name()));
+  std::vector<AnfNodePtr> switch_inputs = {primitive, node->input(1)};
+  for (size_t index = 2; index < node->inputs().size(); index++) {
+    auto input = CreateSwitchInput(node->input(index), graph);
+    switch_inputs.emplace_back(input);
+  }
+  auto switch_node = graph->NewCNode(switch_inputs);
+  return switch_node;
+}
+
+std::vector<AnfNodePtr> SessionBasic::CreateSwitchOrPartialNode(const CNodePtr &cnode, KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(cnode);
   MS_EXCEPTION_IF_NULL(graph);
   // create primitive of cnode:call(partial or switch)
@@ -526,7 +557,8 @@ static std::vector<AnfNodePtr> CreateSwitchOrPartialNode(const CNodePtr &cnode,
                    });
     return cnode_inputs;
   } else if (AnfAlgo::CheckPrimitiveType(cnode_input, prim::kPrimSwitch)) {
-    cnode_inputs.emplace_back(cnode_input);
+    auto switch_node = HandleSwitchInputs(cnode_input, graph);
+    cnode_inputs.emplace_back(switch_node);
     return cnode_inputs;
   }
   MS_LOG(EXCEPTION) << "CNode input[0] must be partial or switch.";
@@ -618,19 +650,19 @@ ParameterPtr SessionBasic::CreateNewParameter(const AnfNodePtr &anf, KernelGraph
     MS_LOG(EXCEPTION) << "Anf[" << anf->DebugString() << "] is not a parameter";
   }
 
-  auto m_tensor = GetParamDefaultInputTensor(anf);
+  auto param_value = GetParamDefaultValue(anf);
   ParameterPtr new_parameter = nullptr;
-  if (python_paras_ == nullptr) {
-    python_paras_ = std::make_shared<std::map<PyObject *, ParameterPtr>>();
+  if (python_paras == nullptr) {
+    python_paras = std::make_shared<std::map<ParamValuePtr, ParameterPtr>>();
   }
-  auto iter = python_paras_->find(m_tensor);
-  if (iter != python_paras_->end()) {
+  auto iter = python_paras->find(param_value);
+  if (iter != python_paras->end()) {
     new_parameter = iter->second;
   } else {
     TraceManager::DebugTrace(std::make_shared<TraceCopy>(anf->debug_info()));
     new_parameter = graph->NewParameter(anf->cast<ParameterPtr>());
-    if (m_tensor != nullptr) {
-      (*python_paras_)[m_tensor] = new_parameter;
+    if (param_value != nullptr) {
+      (*python_paras)[param_value] = new_parameter;
     }
     TraceManager::EndTrace();
   }
@@ -776,13 +808,13 @@ void SessionBasic::AddParameterToGraphInputs(const std::vector<AnfNodePtr> &para
 void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
                                  const std::vector<tensor::TensorPtr> &inputs_const) const {
   std::vector<tensor::TensorPtr> inputs(inputs_const);
-  size_t input_ctrl_size = 1;
+  size_t input_ctrl_size = 2;
   MS_EXCEPTION_IF_NULL(kernel_graph);
   if (kernel_graph->input_ctrl_tensors()) {
     input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs);
   }
   auto input_nodes = kernel_graph->inputs();
-  if ((inputs.size() + input_ctrl_size) - 1 != input_nodes.size()) {
+  if ((inputs.size() + input_ctrl_size) - 2 != input_nodes.size()) {
     MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size()
                       << ", input_ctrl_size:" << input_ctrl_size;
   }
@@ -829,20 +861,11 @@ void SessionBasic::UpdateOutputs(const std::shared_ptr<KernelGraph> &kernel_grap
                                  const std::vector<tensor::TensorPtr> &input_tensors) const {
   MS_EXCEPTION_IF_NULL(kernel_graph);
   MS_EXCEPTION_IF_NULL(outputs);
-  if (!kernel_graph->child_graph_order().empty()) {
-    // use the last child graph output as the root graph output
-    UpdateOutputs(kernel_graph->child_graph_order().back(), outputs, input_tensors);
-    return;
-  }
   auto anf_outputs = kernel_graph->outputs();
   for (auto &item : anf_outputs) {
     MS_EXCEPTION_IF_NULL(item);
     MS_LOG(INFO) << "Update output[" << item->DebugString() << "]";
-    if (AnfAlgo::IsTupleOutput(item) && AnfAlgo::IsRealKernel(item)) {
-      outputs->emplace_back(CreatTupleForOutput(item, *kernel_graph, input_tensors));
-      continue;
-    }
-    outputs->emplace_back(CreatTensorForOutput(item, *kernel_graph, input_tensors));
+    outputs->emplace_back(CreateTensorForOutput(item, *kernel_graph, input_tensors));
   }
 }
 
@@ -931,6 +954,11 @@ CNodePtr SessionBasic::ConstructOutput(const AnfNodePtrList &outputs, const std:
   auto FindEqu = [graph, outputs](const AnfNodePtr &out) -> AnfNodePtr {
     auto backend_anf = graph->GetBackendAnfByFrontAnf(out);
     if (backend_anf != nullptr) {
+      auto context_ptr = MsContext::GetInstance();
+      MS_EXCEPTION_IF_NULL(context_ptr);
+      if (context_ptr->execution_mode() == kPynativeMode) {
+        return backend_anf;
+      }
       auto front_real_kernel = AnfAlgo::VisitKernel(out, 0);
       auto backend_real_kernel = AnfAlgo::VisitKernel(backend_anf, 0);
       MS_EXCEPTION_IF_NULL(out);
diff --git a/mindspore/ccsrc/session/session_basic.h b/mindspore/ccsrc/backend/session/session_basic.h
similarity index 94%
rename from mindspore/ccsrc/session/session_basic.h
rename to mindspore/ccsrc/backend/session/session_basic.h
index cf85dd02250..c662e3978bd 100755
--- a/mindspore/ccsrc/session/session_basic.h
+++ b/mindspore/ccsrc/backend/session/session_basic.h
@@ -24,14 +24,14 @@
 #include <map>
 
 #include "utils/base_ref_extends.h"
-#include "session/session_context.h"
-#include "session/kernel_graph.h"
+#include "backend/session/session_context.h"
+#include "backend/session/kernel_graph.h"
 #include "ir/anf.h"
 #include "ir/tensor.h"
 #include "utils/any.h"
 #include "utils/contract.h"
-#include "pynative/pynative_execute.h"
-#include "device/kernel_info.h"
+#include "pipeline/pynative/pynative_execute.h"
+#include "runtime/device/kernel_info.h"
 #ifdef ENABLE_DEBUGGER
 #include "debug/debugger/debugger.h"
 #endif
@@ -87,6 +87,10 @@ class SessionBasic {
                           std::unordered_map<AnfNodePtr, AnfNodePtr> *other_graph_cnode);
   CNodePtr CreateNewCNode(const CNodePtr &cnode, KernelGraph *graph);
 
+  CNodePtr CreateSwitchInput(const AnfNodePtr &node_input, KernelGraph *graph);
+  CNodePtr HandleSwitchInputs(const AnfNodePtr &anf_node, KernelGraph *graph);
+  std::vector<AnfNodePtr> CreateSwitchOrPartialNode(const CNodePtr &cnode, KernelGraph *graph);
+
   // set parameters of final graph
   virtual GraphId SetFinalGraphInput(const std::vector<AnfNodePtr> &) { return kInvalidGraphId; }
   // set output of final graph
diff --git a/mindspore/ccsrc/session/session_context.cc b/mindspore/ccsrc/backend/session/session_context.cc
similarity index 95%
rename from mindspore/ccsrc/session/session_context.cc
rename to mindspore/ccsrc/backend/session/session_context.cc
index 2b6ebf6b84f..f5ec49c090b 100644
--- a/mindspore/ccsrc/session/session_context.cc
+++ b/mindspore/ccsrc/backend/session/session_context.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/session_context.h"
+#include "backend/session/session_context.h"
 namespace mindspore {
 namespace session {
 std::shared_ptr<Context> Context::GetInstance() {
diff --git a/mindspore/ccsrc/session/session_context.h b/mindspore/ccsrc/backend/session/session_context.h
similarity index 97%
rename from mindspore/ccsrc/session/session_context.h
rename to mindspore/ccsrc/backend/session/session_context.h
index 78794c348e7..22cc0c813a6 100644
--- a/mindspore/ccsrc/session/session_context.h
+++ b/mindspore/ccsrc/backend/session/session_context.h
@@ -23,7 +23,7 @@
 #include <string>
 
 #include "ir/tensor.h"
-#include "pipeline/resource.h"
+#include "pipeline/jit/resource.h"
 #include "utils/context/ms_context.h"
 namespace mindspore {
 namespace session {
diff --git a/mindspore/ccsrc/session/session_factory.cc b/mindspore/ccsrc/backend/session/session_factory.cc
similarity index 96%
rename from mindspore/ccsrc/session/session_factory.cc
rename to mindspore/ccsrc/backend/session/session_factory.cc
index 4cd0481f8cb..8a8f9a9cea1 100644
--- a/mindspore/ccsrc/session/session_factory.cc
+++ b/mindspore/ccsrc/backend/session/session_factory.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "session/session_factory.h"
+#include "backend/session/session_factory.h"
 #include <memory>
 #include <iostream>
 #include <string>
diff --git a/mindspore/ccsrc/session/session_factory.h b/mindspore/ccsrc/backend/session/session_factory.h
similarity index 97%
rename from mindspore/ccsrc/session/session_factory.h
rename to mindspore/ccsrc/backend/session/session_factory.h
index 99db0afeb7f..054f03cf4b4 100644
--- a/mindspore/ccsrc/session/session_factory.h
+++ b/mindspore/ccsrc/backend/session/session_factory.h
@@ -22,7 +22,7 @@
 #include <string>
 #include <utility>
 #include "common/utils.h"
-#include "session/session_basic.h"
+#include "backend/session/session_basic.h"
 namespace mindspore {
 namespace session {
 using SessionCreator = std::function<std::shared_ptr<SessionBasic>()>;
diff --git a/mindspore/ccsrc/common.h b/mindspore/ccsrc/common.h
index 0928dcfcf68..6b882a15d41 100644
--- a/mindspore/ccsrc/common.h
+++ b/mindspore/ccsrc/common.h
@@ -23,13 +23,13 @@
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 
-#include "pipeline/static_analysis/dshape.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/abstract_function.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/parse.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/resolve.h"
+#include "abstract/dshape.h"
+#include "abstract/abstract_value.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/parse.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/resolve.h"
 
 namespace py = pybind11;
 #endif  // MINDSPORE_CCSRC_COMMON_H_
diff --git a/mindspore/ccsrc/common/trans.cc b/mindspore/ccsrc/common/trans.cc
index 9cf6eb3a5a2..1841826ca96 100644
--- a/mindspore/ccsrc/common/trans.cc
+++ b/mindspore/ccsrc/common/trans.cc
@@ -18,9 +18,9 @@
 #include <numeric>
 #include <utility>
 #include "common/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/kernel.h"
-#include "device/convert_tensor_utils.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "runtime/device/convert_tensor_utils.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
 #include "utils/utils.h"
diff --git a/mindspore/ccsrc/common/trans.h b/mindspore/ccsrc/common/trans.h
index a8fc7c8a000..286c76afd0e 100644
--- a/mindspore/ccsrc/common/trans.h
+++ b/mindspore/ccsrc/common/trans.h
@@ -24,7 +24,7 @@
 #include <utility>
 #include <vector>
 #include "ir/dtype.h"
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "ir/dtype/type.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/common/utils.h b/mindspore/ccsrc/common/utils.h
index 8f6e8f7c0c4..23d08f8f281 100644
--- a/mindspore/ccsrc/common/utils.h
+++ b/mindspore/ccsrc/common/utils.h
@@ -38,6 +38,14 @@ static inline std::string GetEnv(const std::string &envvar) {
 
   return std::string(value);
 }
+
+static inline int SetEnv(const char *envname, const char *envvar, int overwrite = 1) {
+#if defined(_WIN32)
+  return 0;
+#else
+  return ::setenv(envname, envvar, overwrite);
+#endif
+}
 }  // namespace common
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/dataset/core/client.h b/mindspore/ccsrc/dataset/core/client.h
deleted file mode 100644
index a10cb4596ea..00000000000
--- a/mindspore/ccsrc/dataset/core/client.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef DATASET_CORE_CLIENT_H_
-#define DATASET_CORE_CLIENT_H_
-
-// client.h
-// Include file for DE client functions
-
-#include "dataset/core/constants.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/barrier_op.h"
-#include "dataset/engine/datasetops/batch_op.h"
-#include "dataset/engine/datasetops/build_vocab_op.h"
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/engine/datasetops/device_queue_op.h"
-#include "dataset/engine/datasetops/map_op.h"
-#include "dataset/engine/datasetops/project_op.h"
-#include "dataset/engine/datasetops/rename_op.h"
-#include "dataset/engine/datasetops/filter_op.h"
-#include "dataset/engine/datasetops/repeat_op.h"
-#include "dataset/engine/datasetops/skip_op.h"
-#include "dataset/engine/datasetops/shuffle_op.h"
-#include "dataset/engine/datasetops/source/generator_op.h"
-#include "dataset/engine/datasetops/source/mindrecord_op.h"
-#include "dataset/engine/datasetops/source/tf_reader_op.h"
-#include "dataset/engine/datasetops/take_op.h"
-#include "dataset/engine/datasetops/zip_op.h"
-#include "dataset/engine/datasetops/concat_op.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/util/status.h"
-
-namespace mindspore {
-namespace dataset {
-// This is a one-time global initializer that needs to be called at the
-// start of any minddata applications.
-extern Status GlobalInit();
-}  // namespace dataset
-}  // namespace mindspore
-
-#endif  // DATASET_CORE_CLIENT_H_
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
deleted file mode 100644
index b78ddcd87bc..00000000000
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-add_subdirectory(sampler)
-file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
-set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(engine-datasetops-source OBJECT
-    generator_op.cc
-    io_block.cc
-    mindrecord_op.cc
-    tf_reader_op.cc
-    image_folder_op.cc
-    mnist_op.cc
-    voc_op.cc
-    coco_op.cc
-    manifest_op.cc
-    cifar_op.cc
-    random_data_op.cc
-    celeba_op.cc
-    text_file_op.cc
-    clue_op.cc
-    )
\ No newline at end of file
diff --git a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc
deleted file mode 100644
index 04e00d878d1..00000000000
--- a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vector>
-#include <utility>
-#include "dataset/kernels/image/bounding_box_augment_op.h"
-#include "dataset/kernels/image/resize_op.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/core/cv_tensor.h"
-
-namespace mindspore {
-namespace dataset {
-const float BoundingBoxAugmentOp::kDefRatio = 0.3;
-
-BoundingBoxAugmentOp::BoundingBoxAugmentOp(std::shared_ptr<TensorOp> transform, float ratio)
-    : ratio_(ratio), transform_(std::move(transform)) {
-  rnd_.seed(GetSeed());
-}
-
-Status BoundingBoxAugmentOp::Compute(const TensorRow &input, TensorRow *output) {
-  IO_CHECK_VECTOR(input, output);
-  BOUNDING_BOX_CHECK(input);  // check if bounding boxes are valid
-  uint32_t num_of_boxes = input[1]->shape()[0];
-  uint32_t num_to_aug = num_of_boxes * ratio_;  // cast to int
-  std::vector<uint32_t> boxes(num_of_boxes);
-  std::vector<uint32_t> selected_boxes;
-  for (uint32_t i = 0; i < num_of_boxes; i++) boxes[i] = i;
-  // sample bboxes according to ratio picked by user
-  std::sample(boxes.begin(), boxes.end(), std::back_inserter(selected_boxes), num_to_aug, rnd_);
-  std::shared_ptr<Tensor> crop_out;
-  std::shared_ptr<Tensor> res_out;
-  std::shared_ptr<CVTensor> input_restore = CVTensor::AsCVTensor(input[0]);
-
-  for (uint32_t i = 0; i < num_to_aug; i++) {
-    uint32_t min_x = 0;
-    uint32_t min_y = 0;
-    uint32_t b_w = 0;
-    uint32_t b_h = 0;
-    // get the required items
-    input[1]->GetItemAt<uint32_t>(&min_x, {selected_boxes[i], 0});
-    input[1]->GetItemAt<uint32_t>(&min_y, {selected_boxes[i], 1});
-    input[1]->GetItemAt<uint32_t>(&b_w, {selected_boxes[i], 2});
-    input[1]->GetItemAt<uint32_t>(&b_h, {selected_boxes[i], 3});
-    Crop(input_restore, &crop_out, min_x, min_y, b_w, b_h);
-    // transform the cropped bbox region
-    transform_->Compute(crop_out, &res_out);
-    // place the transformed region back in the restored input
-    std::shared_ptr<CVTensor> res_img = CVTensor::AsCVTensor(res_out);
-    // check if transformed crop is out of bounds of the box
-    if (res_img->mat().cols > b_w || res_img->mat().rows > b_h || res_img->mat().cols < b_w ||
-        res_img->mat().rows < b_h) {
-      // if so, resize to fit in the box
-      std::shared_ptr<TensorOp> resize_op = std::make_shared<ResizeOp>(b_h, b_w);
-      resize_op->Compute(std::static_pointer_cast<Tensor>(res_img), &res_out);
-      res_img = CVTensor::AsCVTensor(res_out);
-    }
-    res_img->mat().copyTo(input_restore->mat()(cv::Rect(min_x, min_y, res_img->mat().cols, res_img->mat().rows)));
-  }
-  (*output).push_back(std::move(std::static_pointer_cast<Tensor>(input_restore)));
-  (*output).push_back(input[1]);
-  return Status::OK();
-}
-
-}  // namespace dataset
-}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.cc b/mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.cc
deleted file mode 100644
index de1d915fbb4..00000000000
--- a/mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "dataset/text/kernels/jieba_tokenizer_op.h"
-
-#include <vector>
-#include <memory>
-#include <string>
-#include "dataset/util/path.h"
-
-namespace mindspore {
-namespace dataset {
-
-JiebaTokenizerOp::JiebaTokenizerOp(const std::string &hmm_path, const std::string &dict_path, JiebaMode mode)
-    : jieba_mode_(mode), hmm_model_path_(hmm_path), mp_dict_path_(dict_path) {
-  jieba_parser_ = std::make_unique<cppjieba::Jieba>(mp_dict_path_, hmm_model_path_, "");
-}
-
-Status JiebaTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  RETURN_UNEXPECTED_IF_NULL(jieba_parser_);
-
-  if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
-    RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor");
-  }
-
-  std::string_view sentence_v;
-  RETURN_IF_NOT_OK(input->GetItemAt(&sentence_v, {}));
-  std::string sentence{sentence_v};
-  std::vector<std::string> words;
-  if (sentence == "") {
-    words.push_back("");
-  } else {
-    if (jieba_mode_ == JiebaMode::kMp) {
-      jieba_parser_->CutSmall(sentence, words, MAX_WORD_LENGTH);
-    } else if (jieba_mode_ == JiebaMode::kHmm) {
-      jieba_parser_->CutHMM(sentence, words);
-    } else {  // Mix
-      jieba_parser_->Cut(sentence, words, true);
-    }
-  }
-  *output = std::make_shared<Tensor>(words, TensorShape({(dsize_t)words.size()}));
-  return Status::OK();
-}
-
-Status JiebaTokenizerOp::AddWord(const std::string &word, int freq) {
-  RETURN_UNEXPECTED_IF_NULL(jieba_parser_);
-  if (jieba_parser_->InsertUserWord(word, freq, "") == false) {
-    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "add word error");
-  }
-  return Status::OK();
-}
-}  // namespace dataset
-}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.cc b/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.cc
deleted file mode 100644
index 063bf216308..00000000000
--- a/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "dataset/text/kernels/unicode_char_tokenizer_op.h"
-#include <memory>
-#include <string>
-#include <string_view>
-#include <vector>
-
-#include "cppjieba/Unicode.hpp"
-
-using cppjieba::DecodeRunesInString;
-using cppjieba::RuneStrArray;
-
-namespace mindspore {
-namespace dataset {
-
-Status UnicodeCharTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
-    RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
-  }
-  std::string_view str;
-  RETURN_IF_NOT_OK(input->GetItemAt(&str, {}));
-
-  RuneStrArray runes;
-  if (!DecodeRunesInString(str.data(), str.size(), runes)) {
-    RETURN_STATUS_UNEXPECTED("Decode utf8 string failed.");
-  }
-  std::vector<std::string> splits(runes.size());
-  for (size_t i = 0; i < runes.size(); i++) {
-    splits[i] = str.substr(runes[i].offset, runes[i].len);
-  }
-  if (splits.empty()) {
-    splits.emplace_back("");
-  }
-  *output = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()}));
-  return Status::OK();
-}
-}  // namespace dataset
-}  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/CMakeLists.txt b/mindspore/ccsrc/debug/CMakeLists.txt
index ba0c5e07ac2..37ffcceeaf5 100644
--- a/mindspore/ccsrc/debug/CMakeLists.txt
+++ b/mindspore/ccsrc/debug/CMakeLists.txt
@@ -19,6 +19,15 @@ if (ENABLE_DEBUGGER)
         )
 endif (ENABLE_DEBUGGER)
 
+if (ENABLE_D)
+    list(APPEND _DEBUG_SRC_LIST
+        "${CMAKE_CURRENT_SOURCE_DIR}/common.cc"
+        )
+    if (ENABLE_DATA_DUMP)
+        list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/data_dump_parser.cc")
+    endif(ENABLE_DATA_DUMP)
+endif()
+
 if (ENABLE_DUMP_E2E)
     list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc")
 endif (ENABLE_DUMP_E2E)
diff --git a/mindspore/ccsrc/debug/anf_ir_dump.cc b/mindspore/ccsrc/debug/anf_ir_dump.cc
index fc32e0fb5fc..42d372cefb2 100644
--- a/mindspore/ccsrc/debug/anf_ir_dump.cc
+++ b/mindspore/ccsrc/debug/anf_ir_dump.cc
@@ -24,9 +24,9 @@
 
 #include "ir/primitive.h"
 #include "ir/func_graph.h"
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 #include "utils/graph_utils.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 const std::string ToShortString(const TypeId &typeId) {
@@ -128,7 +128,7 @@ void DumpKernelInfo(const CNodePtr &node, const std::shared_ptr<SubGraphIRInfo>
     return;
   }
   auto kernel_info = node->kernel_info();
-  if (kernel_info == nullptr || kernel_info->select_kernel_build_info() == nullptr) {
+  if (kernel_info == nullptr || !kernel_info->has_build_info()) {
     return;
   }
 
@@ -179,7 +179,7 @@ void DumpParams(const FuncGraphPtr &graph, std::ostringstream &buffer, OrderedMa
     // print parameters' type and shape
     PrintNodeOutputType(buffer, p);
     auto kernel_info = p->kernel_info();
-    if (kernel_info != nullptr && kernel_info->select_kernel_build_info() != nullptr) {
+    if (kernel_info != nullptr && kernel_info->has_build_info()) {
       buffer << "  :  ";
       auto type = AnfAlgo::GetOutputDeviceDataType(p, 0);
       auto format = AnfAlgo::GetOutputFormat(p, 0);
diff --git a/mindspore/ccsrc/debug/anf_ir_utils.cc b/mindspore/ccsrc/debug/anf_ir_utils.cc
index c797b8efea3..273a6f64581 100644
--- a/mindspore/ccsrc/debug/anf_ir_utils.cc
+++ b/mindspore/ccsrc/debug/anf_ir_utils.cc
@@ -26,19 +26,19 @@
 #include "utils/graph_utils.h"
 #include "utils/symbolic.h"
 #include "ir/meta_func_graph.h"
-#include "ir/param_value_py.h"
+#include "ir/param_value.h"
 #include "ir/tensor_py.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/resolve.h"
-#include "operator/composite/composite.h"
-#include "operator/composite/map.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/resolve.h"
+#include "frontend/operator/composite/composite.h"
+#include "frontend/operator/composite/map.h"
 #include "utils/ordered_map.h"
 #include "utils/ordered_set.h"
 #include "utils/utils.h"
 #include "debug/trace.h"
 #include "debug/label.h"
 #include "utils/context/ms_context.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 using mindspore::tensor::TensorPy;
 
@@ -485,8 +485,8 @@ void AnfExporter::OutputParameters(std::ofstream &ofs, const std::vector<AnfNode
       MS_LOG(EXCEPTION) << "Param could not cast to parameter";
     }
     if (param_ptr->has_default()) {
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_ptr->default_param());
-      ofs << " = @" << DumpObject(param_value->value(), "D");
+      auto param_value = param_ptr->default_param();
+      ofs << " = @" << DumpObject(py::cast(param_value), "D");
     }
 
     // output comment
@@ -1667,7 +1667,7 @@ class IrParser {
 
         // load parameter default value from serialized file
         py::object default_obj = LoadObject(lexer_.GetTokenText());
-        auto param_value_new = std::make_shared<ParamValuePy>(default_obj);
+        auto param_value_new = py::cast<ParamValuePtr>(default_obj);
         param->set_default_param(param_value_new);
 
         tok = lexer_.GetNextToken();
diff --git a/mindspore/ccsrc/debug/anf_ir_utils.h b/mindspore/ccsrc/debug/anf_ir_utils.h
index 4503692eb96..ed5e3b8a5d6 100644
--- a/mindspore/ccsrc/debug/anf_ir_utils.h
+++ b/mindspore/ccsrc/debug/anf_ir_utils.h
@@ -28,9 +28,9 @@
 #include "ir/anf.h"
 #include "ir/func_graph.h"
 #include "ir/meta_func_graph.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/resolve.h"
-#include "operator/composite/composite.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/resolve.h"
+#include "frontend/operator/composite/composite.h"
 #include "utils/symbolic.h"
 #include "utils/ordered_map.h"
 #include "utils/ordered_set.h"
diff --git a/mindspore/ccsrc/debug/common.cc b/mindspore/ccsrc/debug/common.cc
new file mode 100644
index 00000000000..6caf7e2c393
--- /dev/null
+++ b/mindspore/ccsrc/debug/common.cc
@@ -0,0 +1,125 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "debug/common.h"
+
+#include <memory>
+#include <optional>
+#include "utils/system/env.h"
+#include "utils/system/file_system.h"
+#include "utils/log_adapter.h"
+#include "utils/context/ms_context.h"
+
+namespace mindspore {
+std::optional<std::string> Common::GetRealPath(const std::string &input_path) {
+  std::string out_path;
+  auto path_split_pos = input_path.find_last_of('/');
+  if (path_split_pos == std::string::npos) {
+    path_split_pos = input_path.find_last_of('\\');
+  }
+  // get real path
+  char real_path[PATH_MAX] = {0};
+  if (path_split_pos != std::string::npos) {
+    std::string prefix_path = input_path.substr(0, path_split_pos);
+    if (prefix_path.length() >= PATH_MAX) {
+      MS_LOG(ERROR) << "Prefix path is too longer!";
+      return std::nullopt;
+    }
+    std::string last_path = input_path.substr(path_split_pos, input_path.length() - path_split_pos);
+    auto ret = CreateNotExistDirs(prefix_path);
+    if (!ret) {
+      MS_LOG(ERROR) << "CreateNotExistDirs Failed!";
+      return std::nullopt;
+    }
+
+    if (nullptr == realpath(prefix_path.c_str(), real_path)) {
+      MS_LOG(ERROR) << "dir " << prefix_path << " does not exit.";
+      return std::nullopt;
+    }
+    out_path = std::string(real_path) + last_path;
+  }
+
+  if (path_split_pos == std::string::npos) {
+    if (input_path.length() >= PATH_MAX) {
+      MS_LOG(ERROR) << "Prefix path is too longer!";
+      return std::nullopt;
+    }
+    if (nullptr == realpath(input_path.c_str(), real_path)) {
+      MS_LOG(ERROR) << "File " << input_path << " does not exit, it will be created.";
+    }
+    out_path = std::string(real_path);
+  }
+  return out_path;
+}
+
+bool Common::CreateNotExistDirs(const std::string &path) {
+  std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
+  MS_EXCEPTION_IF_NULL(fs);
+  char temp_path[PATH_MAX] = {0};
+  if (path.length() > PATH_MAX) {
+    MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX;
+    return false;
+  }
+  for (uint32_t i = 0; i < path.length(); i++) {
+    temp_path[i] = path[i];
+    if (temp_path[i] == '\\' || temp_path[i] == '/') {
+      if (i != 0) {
+        char tmp_char = temp_path[i];
+        temp_path[i] = '\0';
+        std::string path_handle(temp_path);
+        if (!fs->FileExist(temp_path)) {
+          MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating...";
+          if (!fs->CreateDir(temp_path)) {
+            MS_LOG(ERROR) << "Create " << path_handle << " dir error";
+            return false;
+          }
+        }
+        temp_path[i] = tmp_char;
+      }
+    }
+  }
+
+  if (!fs->FileExist(path)) {
+    MS_LOG(INFO) << "Dir " << path << " does not exit, creating...";
+    if (!fs->CreateDir(path)) {
+      MS_LOG(ERROR) << "Create " << path << " dir error";
+      return false;
+    }
+  }
+  return true;
+}
+
+std::optional<std::string> Common::GetConfigFile(const std::string &env) {
+  if (env.empty()) {
+    MS_LOG(EXCEPTION) << "Invalid env";
+  }
+  auto config_path_str = std::getenv(env.c_str());
+  if (config_path_str == nullptr) {
+    MS_LOG(ERROR) << "Please export env:" << env;
+    return {};
+  }
+  MS_LOG(INFO) << "Async Dump Getenv env:" << env << "=" << config_path_str;
+
+  std::string dump_config_file(config_path_str);
+  std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
+  MS_EXCEPTION_IF_NULL(fs);
+  if (!fs->FileExist(dump_config_file)) {
+    MS_LOG(ERROR) << dump_config_file << " not exist.";
+    return {};
+  }
+  return dump_config_file;
+}
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/common.h b/mindspore/ccsrc/debug/common.h
new file mode 100644
index 00000000000..8d4a6cb4674
--- /dev/null
+++ b/mindspore/ccsrc/debug/common.h
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
+
+#include <string>
+#include <optional>
+#include "utils/contract.h"
+
+namespace mindspore {
+class Common {
+ public:
+  Common() = default;
+  ~Common() = default;
+  static std::optional<std::string> GetRealPath(const std::string &input_path);
+  static std::optional<std::string> GetConfigFile(const std::string &env);
+
+ private:
+  static bool CreateNotExistDirs(const std::string &path);
+};
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
diff --git a/mindspore/ccsrc/debug/data_dump_parser.cc b/mindspore/ccsrc/debug/data_dump_parser.cc
new file mode 100644
index 00000000000..259ec388d32
--- /dev/null
+++ b/mindspore/ccsrc/debug/data_dump_parser.cc
@@ -0,0 +1,152 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "debug/data_dump_parser.h"
+
+#include <fstream>
+#include "utils/context/ms_context.h"
+#include "debug/common.h"
+
+constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH";
+constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP";
+constexpr auto kDataDumpPath = "DATA_DUMP_PATH";
+namespace mindspore {
+void DataDumpParser::ResetParam() {
+  enable_ = false;
+  net_name_.clear();
+  dump_mode_ = 0;
+  dump_step_ = 0;
+  kernel_set_.clear();
+}
+
+bool DataDumpParser::DumpEnabled() const {
+  auto enable_dump = std::getenv(kEnableDataDump);
+  if (!enable_dump) {
+    MS_LOG(WARNING) << "[DataDump] enable dump is null. Please export ENABLE_DATA_DUMP";
+    return false;
+  }
+
+  auto enabled = std::atoi(enable_dump);
+  if (enabled != 1) {
+    MS_LOG(WARNING) << "[DataDump] Please export ENABLE_DATA_DUMP=1";
+    return false;
+  }
+
+  auto context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context);
+  if (context->execution_mode() == kPynativeMode) {
+    MS_LOG(EXCEPTION) << "[DataDump] PyNative mode not support data dump";
+  }
+  return true;
+}
+
+std::optional<std::string> DataDumpParser::GetDumpPath() const {
+  auto dump_path = std::getenv(kDataDumpPath);
+  if (!dump_path) {
+    MS_LOG(ERROR) << "[DataDump] dump path is null. Please export DATA_DUMP_PATH";
+    return {};
+  }
+  std::string dump_path_str(dump_path);
+  return dump_path_str;
+}
+
+void DataDumpParser::ParseDumpConfig() {
+  std::lock_guard<std::mutex> guard(lock_);
+  MS_LOG(INFO) << "[DataDump] parse start";
+  if (!DumpEnabled()) {
+    MS_LOG(INFO) << "[DataDump] dump not enable";
+    return;
+  }
+
+  ResetParam();
+
+  auto dump_config_file = Common::GetConfigFile(kDataDumpConfigPtah);
+  if (!dump_config_file.has_value()) {
+    MS_LOG(EXCEPTION) << "[DataDump] Get config file failed";
+  }
+
+  std::ifstream json_file(dump_config_file.value());
+  if (!json_file.is_open()) {
+    MS_LOG(EXCEPTION) << "[DataDump] " << dump_config_file.value() << " open failed.";
+  }
+
+  nlohmann::json j;
+  json_file >> j;
+  if (j.find("DumpSettings") == j.end()) {
+    MS_LOG(EXCEPTION) << "[DataDump] DumpSettings is not exist.";
+  }
+
+  nlohmann::json dump_settings = j.at("DumpSettings");
+  // convert json to string
+  std::stringstream ss;
+  ss << dump_settings;
+  std::string cfg = ss.str();
+  MS_LOG(INFO) << "[DataDump] Async dump settings Json: " << cfg;
+  if (!IsConfigExist(dump_settings)) {
+    MS_LOG(EXCEPTION) << "[DataDump] Async dump json invalid";
+  }
+
+  if (!ParseDumpSetting(dump_settings)) {
+    MS_LOG(EXCEPTION) << "[DataDump] Parse dump json failed";
+  }
+}
+
+bool DataDumpParser::NeedDump(const std::string &op_full_name) const {
+  if (!DumpEnabled()) {
+    return false;
+  }
+  if (dump_mode_ == 0) {
+    return true;
+  }
+  auto iter = kernel_set_.find(op_full_name);
+  return iter != kernel_set_.end();
+}
+
+bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const {
+  if (dump_settings.find("mode") == dump_settings.end() || dump_settings.find("net_name") == dump_settings.end() ||
+      dump_settings.find("iteration") == dump_settings.end() || dump_settings.find("kernels") == dump_settings.end()) {
+    MS_LOG(ERROR) << "[DataDump] DumpSettings keys are not exist.";
+    return false;
+  }
+  return true;
+}
+
+bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) {
+  auto mode = dump_settings.at("mode");
+  auto net_name = dump_settings.at("net_name");
+  auto iteration = dump_settings.at("iteration");
+  auto kernels = dump_settings.at("kernels");
+  if (!(mode.is_number() && net_name.is_string() && iteration.is_number() && kernels.is_array())) {
+    MS_LOG(ERROR) << "[DataDump] Element's type in Dump config json is invalid.";
+    enable_ = false;
+    return false;
+  }
+
+  enable_ = true;
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  dump_mode_ = mode;
+  net_name_ = net_name;
+  dump_step_ = iteration;
+  for (const auto &kernel : kernels) {
+    auto kernel_str = kernel.dump();
+    kernel_str.erase(std::remove(kernel_str.begin(), kernel_str.end(), '\"'), kernel_str.end());
+    MS_LOG(INFO) << "[DataDump] Need dump kernel:" << kernel_str;
+    kernel_set_.insert(kernel_str);
+  }
+  return true;
+}
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/data_dump_parser.h b/mindspore/ccsrc/debug/data_dump_parser.h
new file mode 100644
index 00000000000..751c61dd1a1
--- /dev/null
+++ b/mindspore/ccsrc/debug/data_dump_parser.h
@@ -0,0 +1,61 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
+
+#include <string>
+#include <set>
+#include <mutex>
+#include <optional>
+#include "nlohmann/json.hpp"
+#include "common/utils.h"
+
+namespace mindspore {
+class DataDumpParser {
+ public:
+  static DataDumpParser &GetInstance() {
+    static DataDumpParser instance;
+    return instance;
+  }
+  void ParseDumpConfig();
+  bool NeedDump(const std::string &op_full_name) const;
+  bool DumpEnabled() const;
+  std::optional<std::string> GetDumpPath() const;
+  bool enable() const { return enable_; }
+  const std::string &net_name() const { return net_name_; }
+  uint32_t dump_mode() const { return dump_mode_; }
+  uint32_t dump_step() const { return dump_step_; }
+  const std::set<std::string> &kernel_set() const { return kernel_set_; }
+
+ private:
+  DataDumpParser() = default;
+  virtual ~DataDumpParser() = default;
+  DISABLE_COPY_AND_ASSIGN(DataDumpParser);
+
+  void ResetParam();
+  bool IsConfigExist(const nlohmann::json &dump_settings) const;
+  bool ParseDumpSetting(const nlohmann::json &dump_settings);
+
+  std::mutex lock_;
+  bool enable_{false};
+  std::string net_name_;
+  uint32_t dump_mode_{0};
+  uint32_t dump_step_{0};
+  std::set<std::string> kernel_set_;
+};
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
diff --git a/mindspore/ccsrc/debug/debug_services.cc b/mindspore/ccsrc/debug/debug_services.cc
index cb883eef515..cc6c5c53ad5 100644
--- a/mindspore/ccsrc/debug/debug_services.cc
+++ b/mindspore/ccsrc/debug/debug_services.cc
@@ -37,8 +37,8 @@ DebugServices &DebugServices::operator=(const DebugServices &other) {
 
 DebugServices::~DebugServices() { delete tensor_loader_; }
 
-void DebugServices::add_watchpoint(unsigned int id, unsigned int watch_condition,
-                                   const std::vector<std::tuple<std::string, bool>> &check_node_list) {
+void DebugServices::AddWatchpoint(unsigned int id, unsigned int watch_condition,
+                                  const std::vector<std::tuple<std::string, bool>> &check_node_list) {
   std::lock_guard<std::mutex> lg(lock_);
 
   watchpoint_t watchpoint_item;
@@ -57,14 +57,14 @@ void DebugServices::add_watchpoint(unsigned int id, unsigned int watch_condition
   watchpoint_table[id] = watchpoint_item;
 }
 
-void DebugServices::remove_watchpoint(unsigned int id) {
+void DebugServices::RemoveWatchpoint(unsigned int id) {
   std::lock_guard<std::mutex> lg(lock_);
   watchpoint_table.erase(id);
 }
 
-void DebugServices::check_watchpoints(std::vector<std::string> *name, std::vector<std::string> *slot,
-                                      std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
-                                      std::vector<int> *condition, std::vector<unsigned int> *wacthpoint_id) {
+void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot,
+                                     std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
+                                     std::vector<int> *condition, std::vector<unsigned int> *wacthpoint_id) {
   std::lock_guard<std::mutex> lg(lock_);
 
   std::vector<std::shared_ptr<TensorData>> tensor_list = tensor_loader_->GetTensor();
@@ -171,9 +171,9 @@ void DebugServices::check_watchpoints(std::vector<std::string> *name, std::vecto
   }
 }
 
-void DebugServices::read_nodes_tensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
-                                       std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
-                                       std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape) {
+void DebugServices::ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
+                                     std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
+                                     std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape) {
   std::vector<std::tuple<std::string, std::shared_ptr<TensorData>>> result_list;
   tensor_loader_->SearchTensors(name, &result_list);
 
@@ -189,6 +189,28 @@ void DebugServices::read_nodes_tensors(std::vector<std::string> name, std::vecto
   }
 }
 
-TensorLoader *DebugServices::get_tensor_loader() const { return tensor_loader_; }
+bool DebugServices::IsWatchPoint(std::string kernel_name,
+                                 std::unordered_map<unsigned int, watchpoint_t> watchpoint_table) {
+  bool ret = false;
+  for (auto w_table_item : watchpoint_table) {
+    auto check_node_list = std::get<1>(w_table_item).check_node_list;
+    for (auto check_node : check_node_list) {
+      std::string w_name = std::get<0>(check_node);
+      bool w_type = std::get<1>(check_node);
+      if ((w_type == true &&
+           ((kernel_name.find(w_name) != string::npos && kernel_name.rfind(w_name, 0) == 0) || w_name == "*")) ||
+          (w_type == false && kernel_name == w_name)) {
+        ret = true;
+        return ret;
+      }
+    }
+  }
+  return ret;
+}
+
+TensorLoader *DebugServices::tensor_loader() const { return tensor_loader_; }
+std::unordered_map<unsigned int, DebugServices::watchpoint_t> DebugServices::GetWatchpointTable() {
+  return watchpoint_table;
+}
 
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/debug_services.h b/mindspore/ccsrc/debug/debug_services.h
index b2fd41cd683..41400af1d57 100644
--- a/mindspore/ccsrc/debug/debug_services.h
+++ b/mindspore/ccsrc/debug/debug_services.h
@@ -37,22 +37,6 @@ class DebugServices {
 
   ~DebugServices();
 
-  void add_watchpoint(unsigned int id, unsigned int watch_condition,
-                      const std::vector<std::tuple<std::string, bool>> &check_node_list);
-
-  void remove_watchpoint(unsigned int id);
-
-  void check_watchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<char *> *data_ptr,
-                         std::vector<unsigned int> *data_size, std::vector<int> *condition,
-                         std::vector<unsigned int> *wacthpoint_id);
-
-  void read_nodes_tensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
-                          std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
-                          std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape);
-
-  TensorLoader *get_tensor_loader() const;
-
- private:
   typedef struct condition_no_param {
     bool enabled = false;
   } condition_no_param_t;
@@ -84,6 +68,26 @@ class DebugServices {
     std::vector<std::tuple<std::string, bool>> check_node_list;
   } watchpoint_t;
 
+  void AddWatchpoint(unsigned int id, unsigned int watch_condition,
+                     const std::vector<std::tuple<std::string, bool>> &check_node_list);
+
+  void RemoveWatchpoint(unsigned int id);
+
+  void CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<char *> *data_ptr,
+                        std::vector<unsigned int> *data_size, std::vector<int> *condition,
+                        std::vector<unsigned int> *wacthpoint_id);
+
+  void ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
+                        std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
+                        std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape);
+
+  bool IsWatchPoint(std::string kernel_name, std::unordered_map<unsigned int, watchpoint_t> watchpoint_table);
+
+  TensorLoader *tensor_loader() const;
+
+  std::unordered_map<unsigned int, watchpoint_t> GetWatchpointTable();
+
+ private:
   std::mutex lock_;
 
   std::unordered_map<unsigned int, watchpoint_t> watchpoint_table;
diff --git a/mindspore/ccsrc/debug/debugger/debug_graph.proto b/mindspore/ccsrc/debug/debugger/debug_graph.proto
index 042360fac37..0930791ac07 100644
--- a/mindspore/ccsrc/debug/debugger/debug_graph.proto
+++ b/mindspore/ccsrc/debug/debugger/debug_graph.proto
@@ -313,4 +313,10 @@ message TensorProto {
 
   // If the tensor content transferring is finished.
   optional bool finished = 6;
+
+  // The iteration of the tensor. Supported: "prev" or leave empty.
+  optional string iter = 7;
+
+  // If the tensor name should be truncated.
+  optional bool truncate = 8;
 }
\ No newline at end of file
diff --git a/mindspore/ccsrc/debug/debugger/debugger.cc b/mindspore/ccsrc/debug/debugger/debugger.cc
index ea147a929f5..dd89e17e2db 100644
--- a/mindspore/ccsrc/debug/debugger/debugger.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger.cc
@@ -19,8 +19,8 @@
 #include <vector>
 #include <algorithm>
 #include "debug/debugger/debugger.h"
-#include "pipeline/pipeline.h"
-#include "session/anf_runtime_algorithm.h"
+#include "pipeline/jit/pipeline.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 using debugger::EventReply;
 using debugger::GraphProto;
@@ -43,7 +43,8 @@ Debugger::Debugger()
       device_id_(0),
       num_step_(0),
       debugger_enabled_(false),
-      is_dataset_graph_(false) {}
+      is_dataset_graph_(false),
+      partial_memory_(false) {}
 
 void Debugger::Init(const uint32_t device_id) {
   // access lock for public method
@@ -57,6 +58,7 @@ void Debugger::EnableDebugger() {
   // reset some of the class members
   num_step_ = 0;
   debugger_enabled_ = false;
+  partial_memory_ = false;
   grpc_client_ = nullptr;
   debug_services_ = nullptr;
 
@@ -72,7 +74,8 @@ void Debugger::EnableDebugger() {
     MS_LOG(WARNING) << "Not enabling debugger. Set environment variable ENABLE_MS_DEBUGGER=1 to enable debugger.";
     return;
   }
-  // configure host
+
+  // configure grpc host
   const char *env_host_str = std::getenv("MS_DEBUGGER_HOST");
   std::string host;
   if (env_host_str != nullptr) {
@@ -82,7 +85,7 @@ void Debugger::EnableDebugger() {
     MS_LOG(WARNING) << "Environment variable MS_DEBUGGER_HOST doesn't exist. Using default debugger host: localhost";
     host = "localhost";
   }
-  // configure port
+  // configure grpc port
   const char *env_port_str = std::getenv("MS_DEBUGGER_PORT");
   std::string port;
   if (env_port_str != nullptr) {
@@ -93,6 +96,27 @@ void Debugger::EnableDebugger() {
     port = "50051";
   }
 
+  // configure partial memory reuse
+  const char *env_partial_mem_str = std::getenv("MS_DEBUGGER_PARTIAL_MEM");
+  if (env_partial_mem_str != nullptr) {
+    MS_LOG(INFO) << "Getenv MS_DEBUGGER_PARTIAL_MEM: " << env_partial_mem_str;
+    if (std::strcmp(env_partial_mem_str, "1") == 0) {
+      partial_memory_ = true;
+    }
+  }
+  // switch memory reuse on or off
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  context_ptr->set_enable_mem_reuse(partial_memory_);
+  // print some message about memory reuse to user
+  if (partial_memory_) {
+    MS_LOG(WARNING) << "Partial Memory Reuse is enabled. Note: 1. Please only set watchpoints before running the first "
+                       "step. 2. Tensor values are only available for nodes that are watched by any watchpoint.";
+  } else {
+    MS_LOG(WARNING) << "Memory Reuse is disabled. Set environment variable MS_DEBUGGER_PARTIAL_MEM=1 to reduce memory "
+                       "usage for large models.";
+  }
+
   // initialize grpc client
   grpc_client_ = std::make_unique<GrpcClient>(host, port);
   debug_services_ = std::make_unique<DebugServices>();
@@ -106,6 +130,7 @@ void Debugger::Reset() {
   num_step_ = 0;
   debugger_enabled_ = false;
   is_dataset_graph_ = false;
+  partial_memory_ = false;
   graph_ptr_ = nullptr;
   grpc_client_ = nullptr;
   debug_services_ = nullptr;
@@ -178,7 +203,7 @@ void Debugger::CheckDatasetGraph() {
   is_dataset_graph_ = false;
 }
 
-GraphProto Debugger::GetGraphProto() {
+GraphProto Debugger::GetGraphProto() const {
   // convert kernel graph to debugger modelproto
   ModelProto model = GetDebuggerFuncGraphProto(graph_ptr_);
   return model.graph();
@@ -261,12 +286,9 @@ void Debugger::CommandLoop() {
             MS_LOG(INFO) << "node name: " << node.node_name();
             MS_LOG(INFO) << "node type: " << node.node_type();
           }
-          WatchCondition recieved_condition = GetWatchcondition(reply);
-          MS_LOG(INFO) << "condition: " << recieved_condition.condition();
-          int32_t id = GetWatchpointID(reply);
-          MS_LOG(INFO) << "id: " << id;
-          bool delete_ = GetWatchpointDelete(reply);
-          MS_LOG(INFO) << "delete: " << delete_;
+          MS_LOG(INFO) << "condition: " << GetWatchcondition(reply).condition();
+          MS_LOG(INFO) << "id: " << GetWatchpointID(reply);
+          MS_LOG(INFO) << "delete: " << GetWatchpointDelete(reply);
         }
         MS_LOG(INFO) << "Setting watchpoint";
         if (GetWatchpointDelete(reply)) {
@@ -284,15 +306,20 @@ void Debugger::CommandLoop() {
             MS_LOG(INFO) << "tensor node name: " << tensor.node_name();
             MS_LOG(INFO) << "tensor slot: " << tensor.slot();
             MS_LOG(INFO) << "tensor finished: " << std::boolalpha << tensor.finished() << std::noboolalpha;
+            MS_LOG(INFO) << "tensor iter: " << tensor.iter();
+            MS_LOG(INFO) << "tensor truncate: " << std::boolalpha << tensor.truncate() << std::noboolalpha;
           }
         }
         MS_LOG(INFO) << "Sending tensors";
         std::list<TensorProto> tensors = LoadTensors(GetTensors(reply));
         {
+          // print view cmd reply
           for (auto tensor : tensors) {
             MS_LOG(INFO) << "tensor node name: " << tensor.node_name();
             MS_LOG(INFO) << "tensor slot: " << tensor.slot();
             MS_LOG(INFO) << "tensor finished: " << std::boolalpha << tensor.finished() << std::noboolalpha;
+            MS_LOG(INFO) << "tensor iter: " << tensor.iter();
+            MS_LOG(INFO) << "tensor truncate: " << std::boolalpha << tensor.truncate() << std::noboolalpha;
             MS_LOG(INFO) << "tensor dims: ";
             for (auto dim : tensor.dims()) {
               MS_LOG(INFO) << dim << ",";
@@ -309,7 +336,115 @@ void Debugger::CommandLoop() {
   }
 }
 
-DebuggerCommand Debugger::GetCommand(const EventReply &reply) {
+void Debugger::SetWatchpoint(const ProtoVector<WatchNode> &nodes, const WatchCondition &condition, const int32_t id) {
+  std::vector<std::tuple<std::string, bool>> check_node_list;
+  std::transform(nodes.begin(), nodes.end(), std::back_inserter(check_node_list),
+                 [](WatchNode node) -> std::tuple<std::string, bool> {
+                   return make_tuple(node.node_name(), node.node_type() == "scope");
+                 });
+  debug_services_->AddWatchpoint(id, condition.condition(), check_node_list);
+}
+
+void Debugger::RemoveWatchpoint(const int32_t id) { debug_services_->RemoveWatchpoint(id); }
+
+std::list<TensorProto> Debugger::LoadTensors(const ProtoVector<TensorProto> &tensors) const {
+  std::vector<std::string> name;
+  std::vector<std::string> ret_name;
+  std::vector<char *> data_ptr;
+  std::vector<unsigned int> data_size;
+  std::vector<TypePtr> dtype;
+  std::vector<std::vector<int>> shape;
+
+  std::transform(tensors.begin(), tensors.end(), std::back_inserter(name), GetTensorFullName);
+
+  // ret_name will contain tensor names that are found in TensorLoader
+  // items in ret_name will be in the same order with tensors if found
+  debug_services_->ReadNodesTensors(name, &ret_name, &data_ptr, &data_size, &dtype, &shape);
+
+  std::list<TensorProto> tensor_list;
+  unsigned int result_index = 0;
+  for (auto tensor : tensors) {
+    TensorProto tensor_item;
+    tensor_item.set_node_name(tensor.node_name());
+    tensor_item.set_slot(tensor.slot());
+    tensor_item.set_iter(tensor.iter());
+    tensor_item.set_truncate(tensor.truncate());
+    tensor_item.clear_tensor_content();
+    tensor_item.clear_data_type();
+    tensor_item.clear_dims();
+    // always set finished to true before big tensor splitting is supported
+    tensor_item.set_finished(true);
+
+    // return empty tensor if didn't find the requested tensor
+    if (result_index >= ret_name.size() || ret_name[result_index] != GetTensorFullName(tensor)) {
+      tensor_list.push_back(tensor_item);
+      continue;
+    }
+
+    tensor_item.set_tensor_content(data_ptr[result_index], data_size[result_index]);
+    tensor_item.set_data_type(GetDebuggerNumberDataType(dtype[result_index]));
+    for (auto &elem : shape[result_index]) {
+      tensor_item.add_dims(elem);
+    }
+
+    // add tensor to result list and increment result_index to check next item in ret_name
+    tensor_list.push_back(tensor_item);
+    result_index++;
+  }
+  return tensor_list;
+}
+
+void Debugger::Exit() {
+  // clear resource before exit
+  pipeline::ClearResAtexit();
+  std::exit(EXIT_FAILURE);
+}
+
+std::list<WatchpointHit> Debugger::CheckWatchpoints() const {
+  std::vector<std::string> name;
+  std::vector<std::string> slot;
+  std::vector<char *> data_ptr;
+  std::vector<unsigned int> data_size;
+  std::vector<int> condition;
+  std::vector<unsigned int> watchpoint_id;
+
+  debug_services_->CheckWatchpoints(&name, &slot, &data_ptr, &data_size, &condition, &watchpoint_id);
+  std::list<WatchpointHit> hits;
+  for (unsigned int i = 0; i < name.size(); i++) {
+    WatchpointHit hit;
+    hit.set_id(watchpoint_id[i]);
+
+    // here TensorProto act as a tensor indicator, not sending tensor content
+    TensorProto *tensor_item = hit.mutable_tensor();
+    tensor_item->set_node_name(name[i]);
+    tensor_item->set_slot(slot[i]);
+    tensor_item->set_finished(true);
+
+    WatchCondition *condition_item = hit.mutable_watch_condition();
+    condition_item->set_condition(debugger::WatchCondition_Condition(condition[i]));
+
+    hits.push_back(hit);
+  }
+  return hits;
+}
+
+void Debugger::SendWatchpointsAndSuspend(const std::list<WatchpointHit> &points) {
+  // send info about watchpoint
+  if (!points.empty()) {
+    EventReply reply = grpc_client_->SendWatchpointHits(points);
+    if (reply.status() != reply.OK) {
+      MS_LOG(ERROR) << "Error: SendWatchpointHits failed";
+    }
+  }
+  // enter command loop
+  CommandLoop();
+}
+
+DebugServices *Debugger::debug_services() const { return debug_services_.get(); }
+
+bool Debugger::debugger_enabled() const { return debugger_enabled_; }
+
+DebuggerCommand GetCommand(const EventReply &reply) {
   DebuggerCommand cmd = DebuggerCommand::kUnknownCMD;
   switch (reply.cmd_case()) {
     case debugger::EventReply::CmdCase::kExit:
@@ -331,7 +466,7 @@ DebuggerCommand Debugger::GetCommand(const EventReply &reply) {
   return cmd;
 }
 
-ProtoVector<WatchNode> Debugger::GetWatchnodes(const EventReply &reply) {
+ProtoVector<WatchNode> GetWatchnodes(const EventReply &reply) {
   if (!reply.has_set_cmd()) {
     MS_LOG(ERROR) << "Error: Not SetCMD, can not get WatchNodes. Returning default value: ProtoVector<WatchNode>().";
     return ProtoVector<WatchNode>();
@@ -339,7 +474,7 @@ ProtoVector<WatchNode> Debugger::GetWatchnodes(const EventReply &reply) {
   return reply.set_cmd().watch_nodes();
 }
 
-WatchCondition Debugger::GetWatchcondition(const EventReply &reply) {
+WatchCondition GetWatchcondition(const EventReply &reply) {
   if (!reply.has_set_cmd() || !reply.set_cmd().has_watch_condition()) {
     MS_LOG(ERROR) << "Error: Can not get WatchCondition from command. Returning default value: WatchCondition().";
     return WatchCondition();
@@ -347,7 +482,7 @@ WatchCondition Debugger::GetWatchcondition(const EventReply &reply) {
   return reply.set_cmd().watch_condition();
 }
 
-int32_t Debugger::GetWatchpointID(const EventReply &reply) {
+int32_t GetWatchpointID(const EventReply &reply) {
   if (!reply.has_set_cmd()) {
     MS_LOG(ERROR) << "Error: Not SetCMD, can not get Watchpoint ID. Returning default value: 0.";
     return 0;
@@ -355,7 +490,7 @@ int32_t Debugger::GetWatchpointID(const EventReply &reply) {
   return reply.set_cmd().id();
 }
 
-bool Debugger::GetWatchpointDelete(const EventReply &reply) {
+bool GetWatchpointDelete(const EventReply &reply) {
   if (!reply.has_set_cmd()) {
     MS_LOG(ERROR) << "Error: Not SetCMD, can not get Watchpoint delete flag. Returning default value: false.";
     return false;
@@ -363,7 +498,7 @@ bool Debugger::GetWatchpointDelete(const EventReply &reply) {
   return reply.set_cmd().delete_();
 }
 
-ProtoVector<TensorProto> Debugger::GetTensors(const EventReply &reply) {
+ProtoVector<TensorProto> GetTensors(const EventReply &reply) {
   if (!reply.has_view_cmd()) {
     MS_LOG(ERROR) << "Error: Not ViewCMD, can not get Tensors. Returning default value: ProtoVector<TensorProto>().";
     return ProtoVector<TensorProto>();
@@ -371,118 +506,17 @@ ProtoVector<TensorProto> Debugger::GetTensors(const EventReply &reply) {
   return reply.view_cmd().tensors();
 }
 
-void Debugger::SetWatchpoint(const ProtoVector<WatchNode> &nodes, const WatchCondition &condition, const int32_t id) {
-  std::vector<std::tuple<std::string, bool>> check_node_list;
-  std::transform(nodes.begin(), nodes.end(), std::back_inserter(check_node_list),
-                 [](WatchNode node) -> std::tuple<std::string, bool> {
-                   return make_tuple(node.node_name(), node.node_type() == "scope");
-                 });
-
-  debug_services_->add_watchpoint(id, condition.condition(), check_node_list);
-}
-
-void Debugger::RemoveWatchpoint(const int32_t id) { debug_services_->remove_watchpoint(id); }
-
-std::list<TensorProto> Debugger::LoadTensors(const ProtoVector<TensorProto> &tensors) {
-  std::vector<std::string> name;
-  std::vector<std::string> ret_name;
-  std::vector<char *> data_ptr;
-  std::vector<unsigned int> data_size;
-  std::vector<TypePtr> dtype;
-  std::vector<std::vector<int>> shape;
-
-  std::transform(tensors.begin(), tensors.end(), std::back_inserter(name),
-                 [](TensorProto tensor) -> std::string { return tensor.node_name() + ":" + tensor.slot(); });
-
-  debug_services_->read_nodes_tensors(name, &ret_name, &data_ptr, &data_size, &dtype, &shape);
-
-  std::list<TensorProto> tensor_list;
-  unsigned int result_index = 0;
-  TensorProto tensor_item;
-
-  for (auto tensor : tensors) {
-    tensor_item.set_node_name(tensor.node_name());
-    tensor_item.set_slot(tensor.slot());
-    tensor_item.set_finished(true);
-
-    // return empty tensor if didn't find the requested tensor
-    if (result_index >= ret_name.size() || ret_name[result_index] != tensor.node_name() + ":" + tensor.slot()) {
-      tensor_list.push_back(tensor_item);
-      continue;
-    }
-
-    tensor_item.set_tensor_content(data_ptr[result_index], data_size[result_index]);
-    tensor_item.set_data_type(GetDebuggerNumberDataType(dtype[result_index]));
-    tensor_item.clear_dims();
-    for (auto &elem : shape[result_index]) {
-      tensor_item.add_dims(elem);
-    }
-
-    tensor_list.push_back(tensor_item);
-
-    result_index++;
+std::string GetTensorFullName(const TensorProto &tensor) {
+  string node_name = tensor.node_name();
+  if (tensor.truncate()) {
+    // scopes in node name are seperated by '/'
+    // use the name without scope if truncate is true
+    std::size_t found = node_name.find_last_of("/");
+    node_name = node_name.substr(found + 1);
   }
-
-  return tensor_list;
+  return node_name + ":" + tensor.slot() + (tensor.iter() == "" ? "" : ":" + tensor.iter());
 }
 
-void Debugger::Exit() {
-  // clear resource before exit
-  pipeline::ClearResAtexit();
-  std::exit(EXIT_FAILURE);
-}
-
-std::list<WatchpointHit> Debugger::CheckWatchpoints() {
-  std::vector<std::string> name;
-  std::vector<std::string> slot;
-  std::vector<char *> data_ptr;
-  std::vector<unsigned int> data_size;
-  std::vector<int> condition;
-  std::vector<unsigned int> watchpoint_id;
-
-  debug_services_->check_watchpoints(&name, &slot, &data_ptr, &data_size, &condition, &watchpoint_id);
-
-  std::list<WatchpointHit> points;
-
-  for (unsigned int i = 0; i < name.size(); i++) {
-    TensorProto *tensor_item;
-    tensor_item = new TensorProto();
-    tensor_item->set_node_name(name[i]);
-    tensor_item->set_slot(slot[i]);
-    tensor_item->set_tensor_content(data_ptr[i], data_size[i]);
-
-    // finished in TensorProto will always be true before we implement big tensor splitting
-    tensor_item->set_finished(true);
-
-    WatchCondition *condition_item;
-    condition_item = new WatchCondition();
-    condition_item->set_condition(debugger::WatchCondition_Condition(condition[i]));
-
-    WatchpointHit point;
-    point.set_allocated_tensor(tensor_item);
-    point.set_allocated_watch_condition(condition_item);
-    point.set_id(watchpoint_id[i]);
-
-    points.push_back(point);
-  }
-
-  return points;
-}
-
-void Debugger::SendWatchpointsAndSuspend(const std::list<WatchpointHit> &points) {
-  // send info about watchpoint
-  if (!points.empty()) {
-    EventReply reply = grpc_client_->SendWatchpointHits(points);
-    if (reply.status() != reply.OK) {
-      MS_LOG(ERROR) << "Error: SendWatchpointHits failed";
-    }
-  }
-  // enter command loop
-  CommandLoop();
-}
-
-DebugServices *Debugger::get_debug_services() { return debug_services_.get(); }
-
-bool Debugger::debugger_enabled() { return debugger_enabled_; }
+bool Debugger::partial_memory() { return partial_memory_; }
 
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/debugger/debugger.h b/mindspore/ccsrc/debug/debugger/debugger.h
index 6ce7d036257..5a3965d7cc9 100644
--- a/mindspore/ccsrc/debug/debugger/debugger.h
+++ b/mindspore/ccsrc/debug/debugger/debugger.h
@@ -19,7 +19,7 @@
 #include <list>
 #include <memory>
 #include <string>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include "debug/debugger/grpc_client.h"
 #include "debug/debug_services.h"
 
@@ -72,9 +72,11 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
   // suspend the execution after a debug_op
   void PostDebugOp();
 
-  DebugServices *get_debug_services();
+  DebugServices *debug_services() const;
 
-  bool debugger_enabled();
+  bool debugger_enabled() const;
+
+  bool partial_memory();
 
  private:
   // private constructor for singleton
@@ -92,7 +94,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
   void CheckDatasetGraph();
 
   // serialize graph and get proto
-  GraphProto GetGraphProto();
+  GraphProto GetGraphProto() const;
 
   // send graph and enter command wait loop
   void SendGraphAndSuspend(const GraphProto &graph_proto);
@@ -102,16 +104,6 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
   // break if RunCMD
   void CommandLoop();
 
-  // process reply and command type
-  DebuggerCommand GetCommand(const EventReply &reply);
-
-  // parse other data out of EventReply
-  ProtoVector<WatchNode> GetWatchnodes(const EventReply &reply);
-  WatchCondition GetWatchcondition(const EventReply &reply);
-  int32_t GetWatchpointID(const EventReply &reply);
-  bool GetWatchpointDelete(const EventReply &reply);
-  ProtoVector<TensorProto> GetTensors(const EventReply &reply);
-
   // set what nodes and conditions to watch
   void SetWatchpoint(const ProtoVector<WatchNode> &nodes, const WatchCondition &condition, const int32_t id);
 
@@ -119,14 +111,14 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
   void RemoveWatchpoint(const int32_t id);
 
   // load tensor for view command
-  std::list<TensorProto> LoadTensors(const ProtoVector<TensorProto> &tensors);
+  std::list<TensorProto> LoadTensors(const ProtoVector<TensorProto> &tensors) const;
 
   // terminate training process
   void Exit();
 
   // analyze tensors and check watchpoint conditions
   // return names of tensors and what condition they hit
-  std::list<WatchpointHit> CheckWatchpoints();
+  std::list<WatchpointHit> CheckWatchpoints() const;
 
   // send watchpoints that hit and enter command wait loop
   void SendWatchpointsAndSuspend(const std::list<WatchpointHit> &points);
@@ -139,6 +131,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
   int32_t num_step_;
   bool debugger_enabled_;
   bool is_dataset_graph_;
+  bool partial_memory_;
   std::mutex access_lock_;
 
   // singleton
@@ -155,5 +148,18 @@ ModelProto GetDebuggerFuncGraphProto(const FuncGraphPtr &func_graph);
 // for getting proto DataType from Type of Tensor
 DataType GetDebuggerNumberDataType(const TypePtr &type);
 
+// process reply and command type
+DebuggerCommand GetCommand(const EventReply &reply);
+
+// parse other data out of EventReply
+ProtoVector<WatchNode> GetWatchnodes(const EventReply &reply);
+WatchCondition GetWatchcondition(const EventReply &reply);
+int32_t GetWatchpointID(const EventReply &reply);
+bool GetWatchpointDelete(const EventReply &reply);
+ProtoVector<TensorProto> GetTensors(const EventReply &reply);
+
+// get the full name of a tensor, which is the name used in TensorLoader
+std::string GetTensorFullName(const TensorProto &tensor);
+
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_DEBUG_DEBUGGER_DEBUGGER_H_
diff --git a/mindspore/ccsrc/debug/draw.cc b/mindspore/ccsrc/debug/draw.cc
index 573452eac08..ff8132fb284 100644
--- a/mindspore/ccsrc/debug/draw.cc
+++ b/mindspore/ccsrc/debug/draw.cc
@@ -25,11 +25,11 @@
 
 #include "pybind11/pybind11.h"
 #include "ir/meta_func_graph.h"
-#include "ir/param_value_py.h"
+#include "ir/param_value.h"
 #include "ir/primitive.h"
 #include "utils/graph_utils.h"
 #include "utils/utils.h"
-#include "operator/composite/composite.h"
+#include "frontend/operator/composite/composite.h"
 #include "ir/tensor.h"
 
 namespace py = pybind11;
@@ -321,18 +321,9 @@ void BaseDigraph::FuncGraphParameters(const FuncGraphPtr &key) {
     buffer_ << parameter->ToString();
     auto param = parameter->cast<ParameterPtr>();
     if (param->has_default()) {
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param->default_param());
-      auto py_p = param_value->value();
-      if (py::hasattr(py_p, "default_input")) {
-        py_p = py_p.attr("default_input");
-        std::vector<int> shape;
-        if (py::hasattr(py_p, PYTHON_TENSOR_FLAG)) {
-          auto m_tensor = py_p.cast<std::shared_ptr<tensor::Tensor>>();
-          shape = m_tensor->shape();
-        } else if (py::hasattr(py_p, PYTHON_META_TENSOR_FLAG)) {
-          auto m_tensor = py_p.cast<std::shared_ptr<tensor::MetaTensor>>();
-          shape = m_tensor->shape();
-        }
+      auto tensor = param->default_param()->value();
+      if (tensor) {
+        auto &shape = tensor->shape();
         std::ostringstream shape_str;
         std::copy(shape.begin(), shape.end(), std::ostream_iterator<int>(shape_str, ","));
         buffer_ << "[" << shape_str.str() << "]";
diff --git a/mindspore/ccsrc/debug/draw.h b/mindspore/ccsrc/debug/draw.h
index 7804c6e94a6..cb670fe0f6e 100644
--- a/mindspore/ccsrc/debug/draw.h
+++ b/mindspore/ccsrc/debug/draw.h
@@ -22,7 +22,7 @@
 #include <vector>
 #include "ir/anf.h"
 #include "utils/any.h"
-#include "pipeline/parse/resolve.h"
+#include "pipeline/jit/parse/resolve.h"
 
 namespace mindspore {
 namespace draw {
diff --git a/mindspore/ccsrc/debug/dump_proto.cc b/mindspore/ccsrc/debug/dump_proto.cc
index 99440537c76..35cdfafe26e 100644
--- a/mindspore/ccsrc/debug/dump_proto.cc
+++ b/mindspore/ccsrc/debug/dump_proto.cc
@@ -453,6 +453,7 @@ void ProtoExporter::ExportCNode(const FuncGraphPtr &func_graph, const CNodePtr &
     GetOpNodeTypeAndAttrs(func_graph, op, node_proto);
     node_proto->set_name(std::to_string(apply_idx));
     node_proto->set_scope(node->scope()->name());
+    node_proto->set_full_name(node->fullname_with_scope());
 
     // process OP inputs
     for (size_t i = 1; i < inputs.size(); ++i) {
diff --git a/mindspore/ccsrc/debug/e2e_dump.cc b/mindspore/ccsrc/debug/e2e_dump.cc
index 78a331fc278..9037a6d00b3 100644
--- a/mindspore/ccsrc/debug/e2e_dump.cc
+++ b/mindspore/ccsrc/debug/e2e_dump.cc
@@ -17,12 +17,14 @@
 #include <limits.h>
 #include <fstream>
 #include <string>
+#include <optional>
 #include <nlohmann/json.hpp>
 #include "utils/log_adapter.h"
 #include "utils/system/file_system.h"
 #include "utils/system/env.h"
 #include "utils/convert_utils.h"
 #include "utils/context/ms_context.h"
+#include "debug/common.h"
 
 using json = nlohmann::json;
 
@@ -158,100 +160,19 @@ bool Dump::DumpToFile(const std::string &filename, const void *data, size_t len)
     return false;
   }
 
-  std::string realpath;
-  bool ret = GetRealPath(filename, &realpath);
-  if (!ret) {
+  auto realpath = Common::GetRealPath(filename);
+  if (!realpath.has_value()) {
     MS_LOG(ERROR) << "Get real path failed.";
     return false;
   }
   std::ofstream fd;
-  fd.open(realpath, std::ios::binary | std::ios::out);
+  fd.open(realpath.value(), std::ios::binary | std::ios::out);
   if (!fd.is_open()) {
-    MS_LOG(ERROR) << "Open file " << realpath << " fail.";
+    MS_LOG(ERROR) << "Open file " << realpath.value() << " fail.";
     return false;
   }
   (void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len));
   fd.close();
   return true;
 }
-
-bool Dump::GetRealPath(const std::string &inpath, std::string *outpath) {
-  MS_EXCEPTION_IF_NULL(outpath);
-  auto path_split_pos = inpath.find_last_of('/');
-  if (path_split_pos == std::string::npos) {
-    path_split_pos = inpath.find_last_of('\\');
-  }
-  // get real path
-  char real_path[PATH_MAX] = {0};
-  if (path_split_pos != std::string::npos) {
-    std::string prefix_path = inpath.substr(0, path_split_pos);
-    if (prefix_path.length() >= PATH_MAX) {
-      MS_LOG(ERROR) << "Prefix path is too longer!";
-      return false;
-    }
-    std::string last_path = inpath.substr(path_split_pos, inpath.length() - path_split_pos);
-    auto ret = CreateNotExistDirs(prefix_path);
-    if (ret == false) {
-      MS_LOG(ERROR) << "CreateNotExistDirs Failed!";
-      return false;
-    }
-
-    if (nullptr == realpath(prefix_path.c_str(), real_path)) {
-      MS_LOG(ERROR) << "dir " << prefix_path << " does not exit.";
-      return false;
-    }
-    *outpath = std::string(real_path) + last_path;
-  }
-
-  if (path_split_pos == std::string::npos) {
-    if (inpath.length() >= PATH_MAX) {
-      MS_LOG(ERROR) << "Prefix path is too longer!";
-      return false;
-    }
-    if (nullptr == realpath(inpath.c_str(), real_path)) {
-      MS_LOG(ERROR) << "File " << inpath << " does not exit, it will be created.";
-    }
-    *outpath = std::string(real_path);
-  }
-
-  return true;
-}
-
-bool Dump::CreateNotExistDirs(const std::string &path) {
-  std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
-  MS_EXCEPTION_IF_NULL(fs);
-  char temp_path[PATH_MAX] = {0};
-  if (path.length() > PATH_MAX) {
-    MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX;
-    return false;
-  }
-  for (uint32_t i = 0; i < path.length(); i++) {
-    temp_path[i] = path[i];
-    if (temp_path[i] == '\\' || temp_path[i] == '/') {
-      if (i != 0) {
-        char tmp_char = temp_path[i];
-        temp_path[i] = '\0';
-        std::string path_handle(temp_path);
-        if (!fs->FileExist(temp_path)) {
-          MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating...";
-          if (!fs->CreateDir(temp_path)) {
-            MS_LOG(ERROR) << "Create " << path_handle << " dir error";
-            return false;
-          }
-        }
-        temp_path[i] = tmp_char;
-      }
-    }
-  }
-
-  if (!fs->FileExist(path)) {
-    MS_LOG(INFO) << "Dir " << path << " does not exit, creating...";
-    if (!fs->CreateDir(path)) {
-      MS_LOG(ERROR) << "Create " << path << " dir error";
-      return false;
-    }
-  }
-
-  return true;
-}
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/e2e_dump.h b/mindspore/ccsrc/debug/e2e_dump.h
index 4c3e8308da7..acde1626cb4 100644
--- a/mindspore/ccsrc/debug/e2e_dump.h
+++ b/mindspore/ccsrc/debug/e2e_dump.h
@@ -59,10 +59,6 @@ class Dump {
   uint32_t cur_iter_;
   std::vector<std::string> dump_kernels_;
 
-  static bool GetRealPath(const std::string &inpath, std::string *outpath);
-
-  static bool CreateNotExistDirs(const std::string &path);
-
  private:
   bool ParseDumpConfig(const std::string &dump_config_file);
   bool IsConfigExist(const nlohmann::json &dumpSettings);
diff --git a/mindspore/ccsrc/debug/info.h b/mindspore/ccsrc/debug/info.h
index c09c6031b3d..39475a4606f 100644
--- a/mindspore/ccsrc/debug/info.h
+++ b/mindspore/ccsrc/debug/info.h
@@ -24,7 +24,7 @@
 #include <utility>
 #include <vector>
 
-#include "ir/base.h"
+#include "base/base.h"
 #include "debug/trace_info.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/debug/tensor_data.h b/mindspore/ccsrc/debug/tensor_data.h
index 9704d69089b..00af2032083 100644
--- a/mindspore/ccsrc/debug/tensor_data.h
+++ b/mindspore/ccsrc/debug/tensor_data.h
@@ -51,25 +51,13 @@ class TensorData {
 
   int GetExecutionOrder() { return this->execution_order; }
 
-  int SetExecutionOrder(int execution_order) {
-    this->execution_order = execution_order;
-    return true;
-  }
+  void SetExecutionOrder(int execution_order) { this->execution_order = execution_order; }
 
-  int SetName(const std::string &name) {
-    this->name = name;
-    return true;
-  }
+  void SetName(const std::string &name) { this->name = name; }
 
-  bool SetTensor(mindspore::tensor::TensorPtr out_tensor) {
-    this->tensor_ptr = out_tensor;
-    return true;
-  }
+  void SetTensor(mindspore::tensor::TensorPtr out_tensor) { this->tensor_ptr = out_tensor; }
 
-  bool SetSlot(size_t slot) {
-    this->slot = slot;
-    return true;
-  }
+  void SetSlot(size_t slot) { this->slot = slot; }
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_DEBUG_TENSOR_DATA_H_
diff --git a/mindspore/ccsrc/debug/tensor_load.h b/mindspore/ccsrc/debug/tensor_load.h
index 6c3ea67a785..ae0e89aae27 100644
--- a/mindspore/ccsrc/debug/tensor_load.h
+++ b/mindspore/ccsrc/debug/tensor_load.h
@@ -19,17 +19,28 @@
 #include <memory>
 #include <vector>
 #include <map>
+#include <mutex>
 #include <tuple>
 #include <string>
+#include <utility>
 #include "debug/tensor_data.h"
 namespace mindspore {
 class TensorLoader {
  public:
   TensorLoader() : iter_num(-1) {}
 
-  ~TensorLoader() {}
+  ~TensorLoader() { EmptyTensor(); }
 
-  bool LoadNewTensor(std::shared_ptr<TensorData> tensor) {
+  bool LoadNewTensor(std::shared_ptr<TensorData> tensor, bool keep_prev) {
+    std::lock_guard<std::mutex> lg(lock_);
+    if (keep_prev) {
+      // add prev step tensor into current step map with ":prev" suffix
+      auto handle = prev_tensor_list_map.extract(tensor->GetName());
+      if (!handle.empty()) {
+        handle.key() = tensor->GetName() + ":prev";
+        tensor_list_map.insert(std::move(handle));
+      }
+    }
     tensor_list.push_back(tensor);
     tensor_list_map.insert({tensor->GetName(), tensor});
     return true;
@@ -52,18 +63,23 @@ class TensorLoader {
     }
   }
 
-  bool EmptyTensor() {
-    tensor_list_map.clear();
+  void EmptyTensor() {
+    std::lock_guard<std::mutex> lg(lock_);
+    prev_tensor_list_map.clear();
+    tensor_list_map.swap(prev_tensor_list_map);
     tensor_list.clear();
-    return true;
   }
 
+  void EmptyPrevTensor() { prev_tensor_list_map.clear(); }
+
   void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; }
 
  private:
   std::vector<std::shared_ptr<TensorData>> tensor_list;
   std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map;
+  std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map;
   uint32_t iter_num;
+  std::mutex lock_;
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_DEBUG_TENSOR_LOAD_H_
diff --git a/mindspore/ccsrc/debug/trace.cc b/mindspore/ccsrc/debug/trace.cc
index e12a7b12092..b8d3f0a7c78 100644
--- a/mindspore/ccsrc/debug/trace.cc
+++ b/mindspore/ccsrc/debug/trace.cc
@@ -29,10 +29,10 @@
 
 #include "ir/meta_func_graph.h"
 #include "utils/graph_utils.h"
-#include "operator/composite/composite.h"
+#include "frontend/operator/composite/composite.h"
 #include "ir/tensor.h"
 #include "debug/anf_ir_utils.h"
-#include "pipeline/static_analysis/evaluator.h"
+#include "pipeline/jit/static_analysis/evaluator.h"
 
 namespace mindspore {
 // namespace to support debug trace infomation
diff --git a/mindspore/ccsrc/debug/trace.h b/mindspore/ccsrc/debug/trace.h
index 9583997e934..7cf45abe30a 100644
--- a/mindspore/ccsrc/debug/trace.h
+++ b/mindspore/ccsrc/debug/trace.h
@@ -27,7 +27,7 @@
 #include "debug/info.h"
 #include "ir/anf.h"
 #include "ir/func_graph.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "utils/any.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/debug/trace_info.h b/mindspore/ccsrc/debug/trace_info.h
index cf4f0c080ad..62908cb449e 100644
--- a/mindspore/ccsrc/debug/trace_info.h
+++ b/mindspore/ccsrc/debug/trace_info.h
@@ -24,7 +24,7 @@
 #include <utility>
 #include <vector>
 
-#include "ir/base.h"
+#include "base/base.h"
 
 namespace mindspore {
 class TraceInfo;
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
deleted file mode 100644
index 42c611c3af0..00000000000
--- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <string>
-#include "device/ascend/ascend_memory_manager.h"
-#include "device/ascend/ascend_memory_pool.h"
-#include "utils/context/ms_context.h"
-#include "runtime/mem.h"
-namespace mindspore {
-namespace device {
-namespace ascend {
-constexpr uint64_t kAscendDeviceMemGB = 26;
-constexpr uint64_t kAscendMemPoolGB = 4;
-constexpr uint64_t kMemSizeGB = 30;
-constexpr uint64_t kMaxMemSizeGB = 30;
-constexpr uint64_t kAscendDeviceMemSize = (kAscendDeviceMemGB << kMemSizeGB);
-constexpr uint64_t kAscendMemPoolSize = (kAscendMemPoolGB << kMemSizeGB);
-
-void AscendMemoryManager::MallocDeviceMemory() {
-  auto context_mem = GetDeviceMemSizeFromContext();
-  device_mem_size_ = context_mem == 0 ? kAscendDeviceMemSize : context_mem;
-  static_mem_offset_ = device_mem_size_;
-  auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM);
-  if (ret != RT_ERROR_NONE) {
-    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]";
-  }
-
-  if (context_mem == 0) {
-    device_mem_pool_size_ = kAscendMemPoolSize;
-    ret = rtMalloc(reinterpret_cast<void **>(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM);
-    if (ret != RT_ERROR_NONE) {
-      MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
-    }
-    AscendMemoryPool::GetInstance().set_device_mem_pool_base(device_mem_pool_base_);
-    AscendMemoryPool::GetInstance().set_device_mem_pool_size(device_mem_pool_size_);
-  }
-}
-
-uint64_t AscendMemoryManager::GetDeviceMemSizeFromContext() {
-  auto context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context);
-  auto variable_memory_max_size = context->variable_memory_max_size();
-  if (variable_memory_max_size == "0") {
-    return 0;
-  }
-  MS_LOG(INFO) << "context variable_memory_max_size:" << variable_memory_max_size;
-  auto pos = variable_memory_max_size.find('*');
-  if (pos == std::string::npos) {
-    MS_LOG(EXCEPTION) << "Invalid variable_memory_max_size";
-  }
-  auto gb_str = variable_memory_max_size.substr(0, pos);
-  auto gb_var = std::stoull(gb_str);
-  MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var;
-  if (gb_var > kMaxMemSizeGB || gb_var == 0) {
-    MS_LOG(EXCEPTION) << "Invalid allocate memory size:" << gb_var << " which should be in (0-30]GB";
-  }
-  return gb_var << kMemSizeGB;
-}
-
-void AscendMemoryManager::FreeDeviceMemory() {
-  if (device_mem_base_ != nullptr) {
-    auto ret = rtFree(device_mem_base_);
-    if (ret != RT_ERROR_NONE) {
-      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
-    }
-    device_mem_base_ = nullptr;
-  }
-  if (device_mem_pool_base_ != nullptr) {
-    auto ret = rtFree(device_mem_pool_base_);
-    if (ret != RT_ERROR_NONE) {
-      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
-    }
-    device_mem_pool_base_ = nullptr;
-  }
-}
-
-void *AscendMemoryManager::MallocMemFromMemPool(size_t size) {
-  return AscendMemoryPool::GetInstance().AllocTensorMem(size);
-}
-}  // namespace ascend
-}  // namespace device
-}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_pool.cc b/mindspore/ccsrc/device/ascend/ascend_memory_pool.cc
deleted file mode 100644
index 69c6dca5760..00000000000
--- a/mindspore/ccsrc/device/ascend/ascend_memory_pool.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/ascend/ascend_memory_pool.h"
-#include "device/ascend/ascend_kernel_runtime.h"
-#include "utils/log_adapter.h"
-
-namespace mindspore {
-namespace device {
-namespace ascend {
-size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
-  if (has_malloc_) {
-    MS_LOG(EXCEPTION) << "Has alloc memory pool memory !";
-  }
-  if (size == 0 || size > free_mem_size_) {
-    MS_LOG(EXCEPTION) << "Failed to alloc memory pool memory !";
-  }
-  *addr = device_mem_pool_base_;
-  if (*addr == nullptr) {
-    MS_LOG(EXCEPTION) << "Device memory pool base is nullptr, failed to alloc memory pool memory!";
-  }
-  has_malloc_ = true;
-  free_mem_size_ -= size;
-  return size;
-}
-
-bool AscendMemoryPool::FreeDeviceMem(const DeviceMemPtr &addr) {
-  MS_EXCEPTION_IF_NULL(addr);
-  has_malloc_ = false;
-  free_mem_size_ = total_mem_size_;
-  return true;
-}
-
-size_t AscendMemoryPool::AlignMemorySize(size_t size) const {
-  if (size == 0) {
-    return DYNAMIC_MEM_ALIGN_SIZE;
-  }
-  return ((size + DYNAMIC_MEM_ALIGN_SIZE + 31) / DYNAMIC_MEM_ALIGN_SIZE) * DYNAMIC_MEM_ALIGN_SIZE;
-}
-
-size_t AscendMemoryPool::mem_alloc_unit_size() const { return free_mem_size_ - 512; }
-
-void AscendMemoryPool::set_device_mem_pool_base(uint8_t *device_mem_pool_base) {
-  MS_EXCEPTION_IF_NULL(device_mem_pool_base);
-  device_mem_pool_base_ = device_mem_pool_base;
-}
-
-size_t AscendMemoryPool::free_mem_size() { return free_mem_size_; }
-
-size_t AscendMemoryPool::total_mem_size() { return total_mem_size_; }
-}  // namespace ascend
-}  // namespace device
-}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/gpu/distribution/mpi_wrapper.cc b/mindspore/ccsrc/device/gpu/distribution/mpi_wrapper.cc
deleted file mode 100644
index 46b574c575d..00000000000
--- a/mindspore/ccsrc/device/gpu/distribution/mpi_wrapper.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/gpu/distribution/mpi_wrapper.h"
-
-#include <cuda_runtime_api.h>
-#include <string>
-#include "device/gpu/distribution/nccl_wrapper.h"
-
-namespace mindspore {
-namespace device {
-namespace gpu {
-MPIWrapper::MPIWrapper() : rank_id_(0), rank_size_(0), local_rank_id_(0) { Init(); }
-
-MPIWrapper::~MPIWrapper() {
-  int finalized;
-  MPI_Finalized(&finalized);
-  if (finalized == 0) {
-    MPI_Finalize();
-  }
-}
-
-MPIWrapper &MPIWrapper::instance() {
-  static MPIWrapper instance;
-  return instance;
-}
-
-int MPIWrapper::local_rank_id() const { return local_rank_id_; }
-
-void MPIWrapper::Init() {
-  int initialized;
-  CHECK_RET(MPI_Initialized(&initialized), MPI_SUCCESS, "Failed to check mpi initialization status.");
-
-  if (initialized == 0) {
-    MPI_Init(nullptr, nullptr);
-  }
-  CHECK_RET(MPI_Comm_rank(MPI_COMM_WORLD, &rank_id_), MPI_SUCCESS, "Failed to init mpi rank id.");
-  CHECK_RET(MPI_Comm_size(MPI_COMM_WORLD, &rank_size_), MPI_SUCCESS, "Failed to init mpi rank size.");
-  NCCLWrapper::instance().set_rank(rank_id_, rank_size_);
-  AssignLocalRankId();
-
-  ncclUniqueId unique_id;
-  if (rank_id_ == 0) {
-    unique_id = NCCLWrapper::instance().nccl_unique_id();
-  }
-  CHECK_RET(MPI_Bcast(reinterpret_cast<void *>(&unique_id), sizeof(unique_id), MPI_BYTE, 0, MPI_COMM_WORLD),
-            MPI_SUCCESS, "Failed to broadcast nccl unique id.");
-  NCCLWrapper::instance().set_nccl_unique_id(unique_id);
-  return;
-}
-
-void MPIWrapper::AssignLocalRankId() {
-  char host_name[MAX_HOSTNAME_LEN] = {0};
-  CHECK_RET(gethostname(host_name, MAX_HOSTNAME_LEN), 0, "Getting host name failed.");
-  size_t host_hash = std::hash<std::string>()(host_name);
-
-  const int kRankSize = rank_size_;
-  size_t all_host_hashs[kRankSize];
-  all_host_hashs[rank_id_] = host_hash;
-  CHECK_RET(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, all_host_hashs, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD),
-            MPI_SUCCESS, "MPI_Allgather host hashs failed.");
-  for (int global_rank = 0; global_rank < kRankSize; global_rank++) {
-    if (global_rank == rank_id_) {
-      break;
-    }
-    if (all_host_hashs[global_rank] == all_host_hashs[rank_id_]) {
-      local_rank_id_++;
-    }
-  }
-  return;
-}
-}  // namespace gpu
-}  // namespace device
-}  // namespace mindspore
diff --git a/mindspore/ccsrc/operator/CMakeLists.txt b/mindspore/ccsrc/frontend/operator/CMakeLists.txt
similarity index 72%
rename from mindspore/ccsrc/operator/CMakeLists.txt
rename to mindspore/ccsrc/frontend/operator/CMakeLists.txt
index 88bcf0e532b..0b6dd77c690 100644
--- a/mindspore/ccsrc/operator/CMakeLists.txt
+++ b/mindspore/ccsrc/frontend/operator/CMakeLists.txt
@@ -1,3 +1,3 @@
 file(GLOB_RECURSE _OPERATOR_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_OPERATOR_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ANALYZER)
-add_library(_mindspore_operator_obj OBJECT ${_OPERATOR_SRC_FILES})
+add_library(_mindspore_frontend_operator_obj OBJECT ${_OPERATOR_SRC_FILES})
diff --git a/mindspore/ccsrc/operator/cc_implementations.cc b/mindspore/ccsrc/frontend/operator/cc_implementations.cc
similarity index 99%
rename from mindspore/ccsrc/operator/cc_implementations.cc
rename to mindspore/ccsrc/frontend/operator/cc_implementations.cc
index 52b71f410f4..3ec3455be75 100644
--- a/mindspore/ccsrc/operator/cc_implementations.cc
+++ b/mindspore/ccsrc/frontend/operator/cc_implementations.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "operator/cc_implementations.h"
+#include "frontend/operator/cc_implementations.h"
 #include <cassert>
 #include <limits>
 #include <algorithm>
diff --git a/mindspore/ccsrc/operator/cc_implementations.h b/mindspore/ccsrc/frontend/operator/cc_implementations.h
similarity index 100%
rename from mindspore/ccsrc/operator/cc_implementations.h
rename to mindspore/ccsrc/frontend/operator/cc_implementations.h
diff --git a/mindspore/ccsrc/operator/composite/composite.cc b/mindspore/ccsrc/frontend/operator/composite/composite.cc
similarity index 99%
rename from mindspore/ccsrc/operator/composite/composite.cc
rename to mindspore/ccsrc/frontend/operator/composite/composite.cc
index 75532b9fbdf..7d2573e50ab 100644
--- a/mindspore/ccsrc/operator/composite/composite.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/composite.cc
@@ -17,19 +17,19 @@
  * limitations under the License.
  */
 
-#include "operator/composite/composite.h"
+#include "frontend/operator/composite/composite.h"
 #include <algorithm>
 #include <utility>
 #include <sstream>
 
 #include "ir/anf.h"
 #include "ir/func_graph.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/abstract_function.h"
-#include "pipeline/static_analysis/dshape.h"
-#include "pipeline/static_analysis/param_validator.h"
-#include "operator/cc_implementations.h"
-#include "optimizer/opt.h"
+#include "abstract/abstract_value.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
+#include "abstract/dshape.h"
+#include "abstract/param_validator.h"
+#include "frontend/operator/cc_implementations.h"
+#include "frontend/optimizer/opt.h"
 #include "utils/symbolic.h"
 #include "pybind_api/api_register.h"
 #include "./common.h"
diff --git a/mindspore/ccsrc/operator/composite/composite.h b/mindspore/ccsrc/frontend/operator/composite/composite.h
similarity index 95%
rename from mindspore/ccsrc/operator/composite/composite.h
rename to mindspore/ccsrc/frontend/operator/composite/composite.h
index 5944c81fb01..3821192dbaf 100644
--- a/mindspore/ccsrc/operator/composite/composite.h
+++ b/mindspore/ccsrc/frontend/operator/composite/composite.h
@@ -26,12 +26,12 @@
 #include <map>
 #include <set>
 #include <memory>
-#include "operator/composite/zip_operation.h"
-#include "operator/composite/list_append_operation.h"
-#include "operator/composite/do_signature.h"
-#include "operator/composite/unpack_call.h"
-#include "operator/composite/multitype_funcgraph.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "frontend/operator/composite/zip_operation.h"
+#include "frontend/operator/composite/list_append_operation.h"
+#include "frontend/operator/composite/do_signature.h"
+#include "frontend/operator/composite/unpack_call.h"
+#include "frontend/operator/composite/multitype_funcgraph.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "utils/misc.h"
 #include "utils/any.h"
 #include "ir/dtype.h"
diff --git a/mindspore/ccsrc/operator/composite/do_signature.cc b/mindspore/ccsrc/frontend/operator/composite/do_signature.cc
similarity index 89%
rename from mindspore/ccsrc/operator/composite/do_signature.cc
rename to mindspore/ccsrc/frontend/operator/composite/do_signature.cc
index d9bcef30316..50be3c5b29a 100644
--- a/mindspore/ccsrc/operator/composite/do_signature.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/do_signature.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "operator/composite/do_signature.h"
+#include "frontend/operator/composite/do_signature.h"
 #include <algorithm>
 #include <utility>
 
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #include "ir/anf.h"
-#include "pipeline/static_analysis/dshape.h"
-#include "pipeline/static_analysis/param_validator.h"
-#include "operator/cc_implementations.h"
-#include "optimizer/opt.h"
+#include "abstract/dshape.h"
+#include "abstract/param_validator.h"
+#include "frontend/operator/cc_implementations.h"
+#include "frontend/optimizer/opt.h"
 #include "utils/symbolic.h"
 #include "./common.h"
 #include "pybind_api/api_register.h"
@@ -31,12 +31,10 @@
 namespace mindspore {
 // namespace to support composite operators definition
 namespace prim {
-namespace {
-using PatternListType = std::initializer_list<BaseRef>;
 const std::map<TypeId, size_t> type_map = {{kNumberTypeBool, 1},    {kNumberTypeInt8, 2},    {kNumberTypeUInt8, 3},
                                            {kNumberTypeInt16, 4},   {kNumberTypeInt32, 5},   {kNumberTypeInt64, 6},
                                            {kNumberTypeFloat16, 7}, {kNumberTypeFloat32, 8}, {kNumberTypeFloat64, 9}};
-
+namespace {
 const std::vector<Signature> &GetSignature(const ValuePtr &function) {
   static const auto empty = std::vector<Signature>();
   if (function->isa<Primitive>() && function->cast<PrimitivePtr>()->has_signature()) {
@@ -108,6 +106,8 @@ TypeId GetMaxTypeId(const abstract::AbstractBasePtrList &args_spec_list, std::ve
   TypeId max_type_id = kTypeUnknown;
   size_t max_type_number = 0;
   bool has_int8 = false;
+  bool has_scalar_int32 = false;
+  bool has_scalar_float32 = false;
   for (const auto &index : indices) {
     TypeId arg_type_id = kTypeUnknown;
     TypeId arg_type = kTypeUnknown;
@@ -116,6 +116,11 @@ TypeId GetMaxTypeId(const abstract::AbstractBasePtrList &args_spec_list, std::ve
       continue;
     }
     if (arg_type != kObjectTypeTensorType) {
+      if (arg_type_id == kNumberTypeInt32) {
+        has_scalar_int32 = true;
+      } else if (arg_type_id == kNumberTypeFloat32) {
+        has_scalar_float32 = true;
+      }
       continue;
     }
     auto it = type_map.find(arg_type_id);
@@ -137,6 +142,17 @@ TypeId GetMaxTypeId(const abstract::AbstractBasePtrList &args_spec_list, std::ve
   if (max_type_id == kNumberTypeUInt8 && has_int8 == true) {
     max_type_id = kNumberTypeInt16;
   }
+  // if bool is the max type, see if there is scalar input
+  // if so, it means that max is bool tensor, use scalar type instead.
+  // for example: Tensor([True, True]) * 2, expect result is Tensor([2, 2])
+  if (max_type_id == kNumberTypeBool) {
+    if (has_scalar_int32) {
+      max_type_id = kNumberTypeInt32;
+    }
+    if (has_scalar_float32) {
+      max_type_id = kNumberTypeFloat32;
+    }
+  }
   return max_type_id;
 }
 
@@ -225,11 +241,7 @@ void DoAutoCast(const std::string &func_name, const std::vector<Signature> &sign
         if (it_name_map == type_name_map.end()) {
           continue;
         }
-        MS_LOG(EXCEPTION) << "In op '" << func_name << "', \n"
-                          << "the type of writable argument is '" << it_map->second << "', "
-                          << "but the largest type in the same SignatureEumDtype is '" << it_name_map->second
-                          << "'. The writable arg type is not equal to the largest type, "
-                          << "so can not cast automatically.";
+        RaiseExceptionForConvertRefDtype(func_name, it_map->second, it_name_map->second);
       }
       continue;
     }
@@ -313,5 +325,14 @@ FuncGraphPtr DoSignatureMetaFuncGraph::GenerateFuncGraph(const AbstractBasePtrLi
   func_graph->set_flag(FUNC_GRAPH_FLAG_CORE, true);
   return func_graph;
 }
+
+void RaiseExceptionForConvertRefDtype(const std::string &func_name, const std::string &ref_type,
+                                      const std::string &target_type) {
+  MS_LOG(EXCEPTION) << "In op '" << func_name << "', \n"
+                    << "the type of writable argument is '" << ref_type << "', "
+                    << "but the largest type in the same SignatureEumDtype is '" << target_type
+                    << "'. The writable arg type is not equal to the largest type, "
+                    << "so can not cast automatically.";
+}
 }  // namespace prim
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/operator/composite/do_signature.h b/mindspore/ccsrc/frontend/operator/composite/do_signature.h
similarity index 88%
rename from mindspore/ccsrc/operator/composite/do_signature.h
rename to mindspore/ccsrc/frontend/operator/composite/do_signature.h
index 3e1596d63f4..9139be806ac 100644
--- a/mindspore/ccsrc/operator/composite/do_signature.h
+++ b/mindspore/ccsrc/frontend/operator/composite/do_signature.h
@@ -25,7 +25,7 @@
 #include <set>
 #include <memory>
 
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "utils/misc.h"
 #include "utils/any.h"
 #include "ir/dtype.h"
@@ -56,6 +56,11 @@ class DoSignatureMetaFuncGraph : public MetaFuncGraph {
 };
 using RWSignaturePtr = std::shared_ptr<DoSignatureMetaFuncGraph>;
 
+extern const std::map<TypeId, size_t> type_map;
+
+void RaiseExceptionForConvertRefDtype(const std::string &func_name, const std::string &ref_type,
+                                      const std::string &target_type);
+
 AnfNodePtr GenerateCNode(const FuncGraphPtr &func_graph, const std::string &func_name, const ValuePtr &function,
                          const AbstractBasePtrList &args_spec_list, const AnfNodePtrList &old_node_inputs);
 }  // namespace prim
diff --git a/mindspore/ccsrc/operator/composite/list_append_operation.cc b/mindspore/ccsrc/frontend/operator/composite/list_append_operation.cc
similarity index 93%
rename from mindspore/ccsrc/operator/composite/list_append_operation.cc
rename to mindspore/ccsrc/frontend/operator/composite/list_append_operation.cc
index 236a5b7062a..3dfe2e23d02 100644
--- a/mindspore/ccsrc/operator/composite/list_append_operation.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/list_append_operation.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "operator/composite/list_append_operation.h"
+#include "frontend/operator/composite/list_append_operation.h"
 
 #include <vector>
 #include <string>
 #include <memory>
 
-#include "pipeline/static_analysis/param_validator.h"
-#include "optimizer/opt.h"
+#include "abstract/param_validator.h"
+#include "frontend/optimizer/opt.h"
 #include "pybind_api/api_register.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/operator/composite/list_append_operation.h b/mindspore/ccsrc/frontend/operator/composite/list_append_operation.h
similarity index 100%
rename from mindspore/ccsrc/operator/composite/list_append_operation.h
rename to mindspore/ccsrc/frontend/operator/composite/list_append_operation.h
diff --git a/mindspore/ccsrc/operator/composite/map.cc b/mindspore/ccsrc/frontend/operator/composite/map.cc
similarity index 97%
rename from mindspore/ccsrc/operator/composite/map.cc
rename to mindspore/ccsrc/frontend/operator/composite/map.cc
index 2149285323b..a5f674187b3 100644
--- a/mindspore/ccsrc/operator/composite/map.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/map.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "operator/composite/map.h"
+#include "frontend/operator/composite/map.h"
 #include <algorithm>
 #include <memory>
 #include <utility>
@@ -22,12 +22,12 @@
 
 #include "ir/anf.h"
 #include "ir/func_graph.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/abstract_function.h"
-#include "pipeline/static_analysis/dshape.h"
+#include "abstract/abstract_value.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
+#include "abstract/dshape.h"
 #include "pybind_api/api_register.h"
 #include "debug/trace.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "./common.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/operator/composite/map.h b/mindspore/ccsrc/frontend/operator/composite/map.h
similarity index 98%
rename from mindspore/ccsrc/operator/composite/map.h
rename to mindspore/ccsrc/frontend/operator/composite/map.h
index 02d374214ad..428014f9c46 100644
--- a/mindspore/ccsrc/operator/composite/map.h
+++ b/mindspore/ccsrc/frontend/operator/composite/map.h
@@ -24,7 +24,7 @@
 
 #include "ir/dtype.h"
 #include "ir/meta_func_graph.h"
-#include "operator/composite/multitype_funcgraph.h"
+#include "frontend/operator/composite/multitype_funcgraph.h"
 
 namespace mindspore {
 // namespace to support composite operators definition
diff --git a/mindspore/ccsrc/operator/composite/multitype_funcgraph.cc b/mindspore/ccsrc/frontend/operator/composite/multitype_funcgraph.cc
similarity index 76%
rename from mindspore/ccsrc/operator/composite/multitype_funcgraph.cc
rename to mindspore/ccsrc/frontend/operator/composite/multitype_funcgraph.cc
index de6526f6423..ba0d3d9ebb8 100644
--- a/mindspore/ccsrc/operator/composite/multitype_funcgraph.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/multitype_funcgraph.cc
@@ -17,19 +17,20 @@
  * limitations under the License.
  */
 
-#include "operator/composite/multitype_funcgraph.h"
+#include "frontend/operator/composite/multitype_funcgraph.h"
 #include <algorithm>
 #include <utility>
 #include <sstream>
 
 #include "ir/anf.h"
 #include "ir/func_graph.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/abstract_function.h"
-#include "pipeline/static_analysis/dshape.h"
-#include "pipeline/static_analysis/param_validator.h"
-#include "operator/cc_implementations.h"
-#include "optimizer/opt.h"
+#include "abstract/abstract_value.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
+#include "abstract/dshape.h"
+#include "abstract/param_validator.h"
+#include "frontend/operator/cc_implementations.h"
+#include "frontend/optimizer/opt.h"
+#include "utils/context/ms_context.h"
 #include "utils/symbolic.h"
 #include "pybind_api/api_register.h"
 #include "./common.h"
@@ -115,36 +116,43 @@ const py::function MultitypeFuncGraph::SignMatch(const TypePtrList &types) {
     }
     return item.second;
   }
-  // Try best match
-  py::function py_fn_subclass;
-  size_t subclass_match_cnt = 0;
-  for (auto &item : fn_cache_py_) {
-    TypePtrList sign = item.first;
-    if (sign.size() != types.size()) {
-      continue;
+  return py::none();
+}
+
+FuncGraphPtr GenerateStubFunc(const TypePtrList &types) {
+  auto context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context);
+  bool enable_sparse = context->enable_sparse();
+  if (!enable_sparse) {
+    return nullptr;
+  }
+
+  std::vector<AnfNodePtr> parameters;
+  ParameterPtr undetermined_param = nullptr;
+  auto stub = std::make_shared<FuncGraph>();
+  for (size_t i = 0; i < types.size(); ++i) {
+    auto param = stub->add_parameter();
+    parameters.push_back(param);
+    if (types[i]->type_id() == kObjectTypeUndeterminedType) {
+      undetermined_param = param;
     }
-    auto match = true;
-    for (size_t i = 0; i < sign.size(); ++i) {
-      if (!IsIdentidityOrSubclass(UnwrapRef(types[i]), sign[i]) &&
-          !IsParentOrChildrenType(UnwrapRef(types[i]), sign[i])) {
-        match = false;
-        break;
+  }
+  if (undetermined_param != nullptr) {
+    std::vector<AnfNodePtr> inputs{NewValueNode(prim::kPrimMakeTuple)};
+    for (size_t i = 0; i < types.size(); ++i) {
+      if (types[i]->type_id() == kObjectTypeFunction) {
+        std::vector<AnfNodePtr> call_prim{parameters[i], undetermined_param};
+        inputs.push_back(stub->NewCNode(call_prim));
+      } else {
+        inputs.push_back(parameters[i]);
       }
     }
-    if (!match) {
-      continue;
-    }
-    py_fn_subclass = item.second;
-    subclass_match_cnt++;
+    auto stub_output = stub->NewCNode(inputs);
+    stub->set_output(stub_output);
+    stub->set_stub(true);
+    return stub;
   }
-  if (subclass_match_cnt > 1) {
-    MS_LOG(EXCEPTION) << "There are more than one prototypes for overload function match by subclass";
-  }
-  if (subclass_match_cnt == 1) {
-    MS_LOG(DEBUG) << "Found one subclass match";
-    return py_fn_subclass;
-  }
-  return py::none();
+  return nullptr;
 }
 
 FuncGraphPtr MultitypeFuncGraph::GenerateFromTypes(const TypePtrList &types) {
@@ -159,6 +167,11 @@ FuncGraphPtr MultitypeFuncGraph::GenerateFromTypes(const TypePtrList &types) {
     MS_LOG(DEBUG) << "Find overload function " << buffer.str() << ", function: " << func_graph->ToString();
     return func_graph;
   }
+  auto stub = GenerateStubFunc(types);
+  if (stub != nullptr) {
+    MS_LOG(DEBUG) << "GenerateStubFunc " << buffer.str() << ", function: " << stub->ToString();
+    return stub;
+  }
   std::ostringstream oss;
   oss << "There are " << fn_cache_py_.size() << " prototypes for overload function `" << name_
       << "`, corresponding location info:\n";
diff --git a/mindspore/ccsrc/operator/composite/multitype_funcgraph.h b/mindspore/ccsrc/frontend/operator/composite/multitype_funcgraph.h
similarity index 97%
rename from mindspore/ccsrc/operator/composite/multitype_funcgraph.h
rename to mindspore/ccsrc/frontend/operator/composite/multitype_funcgraph.h
index ababf218831..2139a0e9d10 100644
--- a/mindspore/ccsrc/operator/composite/multitype_funcgraph.h
+++ b/mindspore/ccsrc/frontend/operator/composite/multitype_funcgraph.h
@@ -26,7 +26,7 @@
 #include <map>
 #include <set>
 #include <memory>
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "utils/misc.h"
 #include "ir/dtype.h"
 #include "ir/meta_func_graph.h"
diff --git a/mindspore/ccsrc/operator/composite/unpack_call.cc b/mindspore/ccsrc/frontend/operator/composite/unpack_call.cc
similarity index 93%
rename from mindspore/ccsrc/operator/composite/unpack_call.cc
rename to mindspore/ccsrc/frontend/operator/composite/unpack_call.cc
index 3993d415973..2c9e0b538f3 100644
--- a/mindspore/ccsrc/operator/composite/unpack_call.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/unpack_call.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include "operator/composite/unpack_call.h"
+#include "frontend/operator/composite/unpack_call.h"
 #include <algorithm>
 #include <utility>
 
 #include "./common.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/dshape.h"
-#include "pipeline/static_analysis/param_validator.h"
-#include "operator/cc_implementations.h"
+#include "abstract/abstract_value.h"
+#include "abstract/dshape.h"
+#include "abstract/param_validator.h"
+#include "frontend/operator/cc_implementations.h"
 #include "ir/anf.h"
-#include "optimizer/opt.h"
+#include "frontend/optimizer/opt.h"
 #include "utils/symbolic.h"
 #include "pybind_api/api_register.h"
 
diff --git a/mindspore/ccsrc/operator/composite/unpack_call.h b/mindspore/ccsrc/frontend/operator/composite/unpack_call.h
similarity index 96%
rename from mindspore/ccsrc/operator/composite/unpack_call.h
rename to mindspore/ccsrc/frontend/operator/composite/unpack_call.h
index 8c055a93864..79c2600f363 100644
--- a/mindspore/ccsrc/operator/composite/unpack_call.h
+++ b/mindspore/ccsrc/frontend/operator/composite/unpack_call.h
@@ -25,7 +25,7 @@
 #include <set>
 #include <memory>
 
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "utils/misc.h"
 #include "utils/any.h"
 #include "ir/dtype.h"
diff --git a/mindspore/ccsrc/operator/composite/zip_operation.cc b/mindspore/ccsrc/frontend/operator/composite/zip_operation.cc
similarity index 94%
rename from mindspore/ccsrc/operator/composite/zip_operation.cc
rename to mindspore/ccsrc/frontend/operator/composite/zip_operation.cc
index 38f2b516148..9e2b6d28b22 100644
--- a/mindspore/ccsrc/operator/composite/zip_operation.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/zip_operation.cc
@@ -16,14 +16,14 @@
  * limitations under the License.
  */
 
-#include "operator/composite/zip_operation.h"
+#include "frontend/operator/composite/zip_operation.h"
 #include <algorithm>
 
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #include "ir/anf.h"
-#include "pipeline/static_analysis/dshape.h"
-#include "operator/cc_implementations.h"
-#include "optimizer/opt.h"
+#include "abstract/dshape.h"
+#include "frontend/operator/cc_implementations.h"
+#include "frontend/optimizer/opt.h"
 #include "pybind_api/api_register.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/operator/composite/zip_operation.h b/mindspore/ccsrc/frontend/operator/composite/zip_operation.h
similarity index 97%
rename from mindspore/ccsrc/operator/composite/zip_operation.h
rename to mindspore/ccsrc/frontend/operator/composite/zip_operation.h
index 1a3fa1f5fe9..96697cb472d 100644
--- a/mindspore/ccsrc/operator/composite/zip_operation.h
+++ b/mindspore/ccsrc/frontend/operator/composite/zip_operation.h
@@ -27,7 +27,7 @@
 #include <set>
 #include <memory>
 
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "utils/misc.h"
 #include "utils/any.h"
 #include "ir/dtype.h"
diff --git a/mindspore/ccsrc/operator/ops.cc b/mindspore/ccsrc/frontend/operator/ops.cc
similarity index 99%
rename from mindspore/ccsrc/operator/ops.cc
rename to mindspore/ccsrc/frontend/operator/ops.cc
index b682847ed72..5c7672ee3c6 100755
--- a/mindspore/ccsrc/operator/ops.cc
+++ b/mindspore/ccsrc/frontend/operator/ops.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include <memory>
 #include <string>
 
diff --git a/mindspore/ccsrc/operator/ops.h b/mindspore/ccsrc/frontend/operator/ops.h
similarity index 97%
rename from mindspore/ccsrc/operator/ops.h
rename to mindspore/ccsrc/frontend/operator/ops.h
index f7780138961..0dea045a6ea 100755
--- a/mindspore/ccsrc/operator/ops.h
+++ b/mindspore/ccsrc/frontend/operator/ops.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <memory>
 #include "ir/anf.h"
-#include "ir/primitive_base.h"
+#include "ir/primitive.h"
 
 namespace mindspore {
 // namespace to support primitive operators
@@ -294,6 +294,12 @@ extern const PrimitivePtr kPrimIndexedSlicesGetIndices;
 extern const PrimitivePtr kPrimIndexedSlicesGetDenseShape;
 extern const PrimitivePtr kPrimIsIndexedSlices;
 
+// attribute 'unroll_flag' of primitive 'switch', when 'unroll_flag' is '0', 'switch' will not unroll
+const char SWITCH_UNROLL_FLAG[] = "unroll_flag";
+// max loop count of for statement, when loop count is less then this value, the for loop will be unrolled, otherwise it
+//  will be sunk(i.e. not unrolled)
+const int MAX_FOR_LOOP_COUNT = 600;
+
 class DoSignaturePrimitive : public Primitive {
  public:
   explicit DoSignaturePrimitive(const std::string &name, const ValuePtr &function)
diff --git a/mindspore/ccsrc/operator/ops_extends.cc b/mindspore/ccsrc/frontend/operator/ops_extends.cc
similarity index 90%
rename from mindspore/ccsrc/operator/ops_extends.cc
rename to mindspore/ccsrc/frontend/operator/ops_extends.cc
index d415b45adf4..c406682c3ec 100755
--- a/mindspore/ccsrc/operator/ops_extends.cc
+++ b/mindspore/ccsrc/frontend/operator/ops_extends.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include <memory>
 #include <string>
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/data_converter.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/data_converter.h"
 
 namespace mindspore {
 // namespace to support primitive operators
diff --git a/mindspore/ccsrc/operator/prim_arrays.cc b/mindspore/ccsrc/frontend/operator/prim_arrays.cc
similarity index 97%
rename from mindspore/ccsrc/operator/prim_arrays.cc
rename to mindspore/ccsrc/frontend/operator/prim_arrays.cc
index 237ca795eb9..caaf1d1b2a7 100644
--- a/mindspore/ccsrc/operator/prim_arrays.cc
+++ b/mindspore/ccsrc/frontend/operator/prim_arrays.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/utils.h"
-#include "operator/cc_implementations.h"
-#include "pipeline/static_analysis/param_validator.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
+#include "abstract/utils.h"
+#include "frontend/operator/cc_implementations.h"
+#include "abstract/param_validator.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/mindspore/ccsrc/operator/prim_debug.cc b/mindspore/ccsrc/frontend/operator/prim_debug.cc
similarity index 89%
rename from mindspore/ccsrc/operator/prim_debug.cc
rename to mindspore/ccsrc/frontend/operator/prim_debug.cc
index 5e6cdcc3183..718dadf5c15 100644
--- a/mindspore/ccsrc/operator/prim_debug.cc
+++ b/mindspore/ccsrc/frontend/operator/prim_debug.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/param_validator.h"
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/param_validator.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
+#include "abstract/utils.h"
 #include "utils/symbolic.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/operator/prim_maths.cc b/mindspore/ccsrc/frontend/operator/prim_maths.cc
similarity index 90%
rename from mindspore/ccsrc/operator/prim_maths.cc
rename to mindspore/ccsrc/frontend/operator/prim_maths.cc
index 02b86603e74..e4543a38210 100644
--- a/mindspore/ccsrc/operator/prim_maths.cc
+++ b/mindspore/ccsrc/frontend/operator/prim_maths.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/utils.h"
-#include "pipeline/static_analysis/param_validator.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
+#include "abstract/utils.h"
+#include "abstract/param_validator.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/operator/prim_nn.cc b/mindspore/ccsrc/frontend/operator/prim_nn.cc
similarity index 99%
rename from mindspore/ccsrc/operator/prim_nn.cc
rename to mindspore/ccsrc/frontend/operator/prim_nn.cc
index d9a00717571..96c86d815d4 100644
--- a/mindspore/ccsrc/operator/prim_nn.cc
+++ b/mindspore/ccsrc/frontend/operator/prim_nn.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/utils.h"
-#include "pipeline/static_analysis/param_validator.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
+#include "abstract/utils.h"
+#include "abstract/param_validator.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/mindspore/ccsrc/operator/prim_others.cc b/mindspore/ccsrc/frontend/operator/prim_others.cc
similarity index 77%
rename from mindspore/ccsrc/operator/prim_others.cc
rename to mindspore/ccsrc/frontend/operator/prim_others.cc
index ff9ec712bbe..530ad6a10c9 100644
--- a/mindspore/ccsrc/operator/prim_others.cc
+++ b/mindspore/ccsrc/frontend/operator/prim_others.cc
@@ -19,12 +19,12 @@
 
 #include "ir/dtype.h"
 #include "common/utils.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/param_validator.h"
-#include "pipeline/static_analysis/prim.h"
-#include "pipeline/static_analysis/utils.h"
-#include "utils/symbolic.h"
+#include "frontend/operator/ops.h"
+#include "abstract/param_validator.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "abstract/utils.h"
 #include "utils/context/ms_context.h"
+#include "utils/symbolic.h"
 
 namespace mindspore {
 namespace abstract {
@@ -56,79 +56,6 @@ AbstractBasePtr InferImplJ(const AnalysisEnginePtr &, const PrimitivePtr &primit
   return AbstractFunction::MakeAbstractFunction(jv);
 }
 
-class UndeterminedShapeType {
- public:
-  explicit UndeterminedShapeType(const std::string &env_str) {
-    // param_name indices_shape indices_type values_shape values_type dense_shape
-    // export UNDETERMINED_SPARSE_SHAPE_TYPES="sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1
-    // 2:Float32:3 1 2"
-    std::vector<string> fields;
-    string tmp;
-    std::stringstream input(env_str);
-    while (std::getline(input, tmp, ':')) {
-      fields.push_back(tmp);
-    }
-    if (fields.size() != fields_num) {
-      MS_LOG(EXCEPTION) << "Expect " << fields_num << " fields, but got " << fields.size();
-    }
-
-    param_name_ = fields[0];
-
-    indices_shape_ = GetShape(fields[1]);
-    indices_type_ = StringToType(fields[2]);
-
-    values_shape_ = GetShape(fields[3]);
-    values_type_ = StringToType(fields[4]);
-
-    auto dense_shape_vec = GetShape(fields[5]);
-    AbstractBasePtrList dense_shape_list;
-    (void)std::transform(dense_shape_vec.begin(), dense_shape_vec.end(), std::back_inserter(dense_shape_list),
-                         [](const auto &elem) { return FromValue(elem, false); });
-    dense_shape_ = dense_shape_list;
-  }
-  ~UndeterminedShapeType() = default;
-  const std::string &param_name() { return param_name_; }
-  const std::vector<int> &indices_shape() { return indices_shape_; }
-  const TypePtr &indices_type() { return indices_type_; }
-  const std::vector<int> &values_shape() { return values_shape_; }
-  const TypePtr &values_type() { return values_type_; }
-  const AbstractBasePtrList &dense_shape() { return dense_shape_; }
-
- private:
-  std::string param_name_;
-  std::vector<int> indices_shape_;
-  TypePtr indices_type_;
-  std::vector<int> values_shape_;
-  TypePtr values_type_;
-  AbstractBasePtrList dense_shape_;
-  static const size_t fields_num;
-
-  std::vector<int> GetShape(const std::string &shape_str);
-};
-std::vector<int> UndeterminedShapeType::GetShape(const std::string &shape_str) {
-  std::vector<int> ret;
-  std::istringstream iss(shape_str);
-  int elem;
-  while (iss.good()) {
-    iss >> elem;
-    ret.emplace_back(elem);
-  }
-  return ret;
-}
-const size_t UndeterminedShapeType::fields_num = 6;
-
-std::unordered_map<std::string, UndeterminedShapeType> g_undetermined_configs;
-void InitUndeterminedFromEnv(const std::string &sparse_shape_types) {
-  std::string tmp;
-  std::stringstream input(sparse_shape_types);
-  g_undetermined_configs.clear();
-  while (std::getline(input, tmp, ';')) {
-    auto config = UndeterminedShapeType(tmp);
-    g_undetermined_configs.insert(std::make_pair(config.param_name(), config));
-    MS_LOG(DEBUG) << "Undetermined config from env: " << tmp;
-  }
-}
-
 AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                     const AbstractBasePtrList &args_spec_list) {
   MS_EXCEPTION_IF_NULL(primitive);
@@ -142,45 +69,14 @@ AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePt
     MS_LOG(EXCEPTION) << "EnvGetItem evaluator args[1] should be a SymbolicKeyInstance but: " << key->ToString();
   }
 
-  if (!key->sparse_grad().empty()) {
-    // Will be fixed once undetermined type ready
-    if (g_undetermined_configs.empty()) {
-      auto sparse_shape_types = common::GetEnv("UNDETERMINED_SPARSE_SHAPE_TYPES");
-      MS_LOG(INFO) << "Undetermind sparse shape:" << sparse_shape_types;
-      if (sparse_shape_types.empty()) {
-        sparse_shape_types = "sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1 2:Float32:3 1 2";
-      }
-      InitUndeterminedFromEnv(sparse_shape_types);
-    }
-
-    auto shape_types = g_undetermined_configs.find(key->sparse_grad());
-    if (shape_types == g_undetermined_configs.end()) {
-      MS_LOG(EXCEPTION) << "Param " << key->ToString()
-                        << " has sparse_grad, but shape/type is not configured in env UNDETERMINED_SPARSE_SHAPE_TYPES";
-    }
-    MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString();
-    AbstractBasePtrList sparse_list;
-    // indices
-    auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.indices_type());
-    auto indices =
-      std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types->second.indices_shape()));
-    sparse_list.emplace_back(indices);
-    // values
-    auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.values_type());
-    auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types->second.values_shape()));
-    sparse_list.emplace_back(dout);
-    // dense_shape
-    sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types->second.dense_shape()));
-    return std::make_shared<AbstractTuple>(sparse_list);
-  }
-
   auto context = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context);
-  bool enable_sparse_flag = context->enable_sparse_flag();
-  if (enable_sparse_flag && key->has_indexed_slices_grad() && dflt->isa<AbstractTensor>()) {
+  bool enable_sparse = context->enable_sparse();
+  if (enable_sparse && dflt->isa<AbstractTensor>()) {
     auto dflt_tensor = dflt->cast<AbstractTensorPtr>();
     return std::make_shared<AbstractUndetermined>(dflt_tensor->element()->Clone(), dflt_tensor->shape()->Clone());
   }
+
   if (!key->GetValueTrack()->isa<SymbolicKeyInstance>()) {
     return dflt;
   }
@@ -242,10 +138,7 @@ AbstractBasePtr InferImplMakeRef(const AnalysisEnginePtr &, const PrimitivePtr &
   if (type->type_id() != kObjectTypeRefKey) {
     MS_LOG(EXCEPTION) << "First input of make_ref should be a RefKey but a " << type->ToString();
   }
-  auto ret = std::make_shared<AbstractRef>(args_spec_list[0], args_spec_list[1], args_spec_list[2]);
-  ret->set_sparse_grad(args_spec_list[2]->sparse_grad());
-  ret->set_has_indexed_slices_grad(args_spec_list[2]->has_indexed_slices_grad());
-  return ret;
+  return std::make_shared<AbstractRef>(args_spec_list[0], args_spec_list[1], args_spec_list[2]);
 }
 
 AbstractBasePtr InferImplGetRefKey(const AnalysisEnginePtr &, const PrimitivePtr &,
diff --git a/mindspore/ccsrc/operator/prim_statement.cc b/mindspore/ccsrc/frontend/operator/prim_statement.cc
similarity index 96%
rename from mindspore/ccsrc/operator/prim_statement.cc
rename to mindspore/ccsrc/frontend/operator/prim_statement.cc
index fc40e511e1c..bb421bdf8ab 100644
--- a/mindspore/ccsrc/operator/prim_statement.cc
+++ b/mindspore/ccsrc/frontend/operator/prim_statement.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/param_validator.h"
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/param_validator.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
+#include "abstract/utils.h"
 #include "utils/symbolic.h"
 
 namespace mindspore {
@@ -95,7 +95,7 @@ AbstractBasePtr InferImplDot(const AnalysisEnginePtr &, const PrimitivePtr &prim
   return std::make_shared<AbstractTensor>(input_x->element(), std::make_shared<Shape>(param));
 }
 
-AbstractBasePtr InferImplSwitch(const AnalysisEnginePtr &, const PrimitivePtr &,
+AbstractBasePtr InferImplSwitch(const AnalysisEnginePtr &, const PrimitivePtr &prim,
                                 const AbstractBasePtrList &args_spec_list) {
   // Inputs: condition, true branch, false branch
   if (args_spec_list.size() != 3) {
@@ -108,6 +108,11 @@ AbstractBasePtr InferImplSwitch(const AnalysisEnginePtr &, const PrimitivePtr &,
   auto fb = args_spec_list[2];
   MS_EXCEPTION_IF_NULL(cond);
 
+  auto unroll_flag = prim->GetAttr(prim::SWITCH_UNROLL_FLAG);
+  if (unroll_flag != nullptr && GetValue<int>(unroll_flag) == 0) {
+    return tb->Join(fb);
+  }
+
   ValuePtr v = cond->GetValueTrack();
   MS_EXCEPTION_IF_NULL(v);
   // for tensor as condition, keeps both true and false branch.
diff --git a/mindspore/ccsrc/frontend/operator/prim_structures.cc b/mindspore/ccsrc/frontend/operator/prim_structures.cc
new file mode 100644
index 00000000000..b602b07a0cc
--- /dev/null
+++ b/mindspore/ccsrc/frontend/operator/prim_structures.cc
@@ -0,0 +1,712 @@
+/**
+ * This is the C++ adaptation and derivative work of Myia (https://github.com/mila-iqia/myia/).
+ *
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pipeline/jit/static_analysis/prim.h"
+#include "abstract/utils.h"
+#include "abstract/param_validator.h"
+#include "frontend/operator/ops.h"
+#include "utils/convert_utils.h"
+#include "ir/tensor_py.h"
+
+using mindspore::tensor::TensorPy;
+
+namespace mindspore {
+namespace abstract {
+
+AbstractBasePtr InferImplStringEqual(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                     const AbstractBasePtrList &args_spec_list) {
+  // Inputs: two scalars whose value is a string.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractScalarPtr scalar_x = CheckArg<AbstractScalar>(op_name, args_spec_list, 0);
+  AbstractScalarPtr scalar_y = CheckArg<AbstractScalar>(op_name, args_spec_list, 1);
+
+  ValuePtr value_x = scalar_x->BuildValue();
+  ValuePtr value_y = scalar_y->BuildValue();
+  if (!value_x->isa<StringImm>() || !value_y->isa<StringImm>()) {
+    MS_LOG(EXCEPTION) << op_name << " requires 2 parameters are string, but got param0: " << value_x->ToString()
+                      << ", param1: " << value_y->ToString();
+  }
+
+  bool ret = (value_x->cast<StringImmPtr>()->value() == value_y->cast<StringImmPtr>()->value());
+  return std::make_shared<AbstractScalar>(ret);
+}
+
+AbstractBasePtr InferImplStringConcat(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                      const AbstractBasePtrList &args_spec_list) {
+  // Inputs: two scalars whose value is a string.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractScalarPtr scalar_x = CheckArg<AbstractScalar>(op_name, args_spec_list, 0);
+  AbstractScalarPtr scalar_y = CheckArg<AbstractScalar>(op_name, args_spec_list, 1);
+
+  ValuePtr value_x = scalar_x->BuildValue();
+  ValuePtr value_y = scalar_y->BuildValue();
+  if (!value_x->isa<StringImm>() || !value_y->isa<StringImm>()) {
+    MS_LOG(EXCEPTION) << op_name << " requires 2 parameters are string, but got param0: " << value_x->ToString()
+                      << ", param1: " << value_y->ToString();
+  }
+
+  std::string ret = (value_x->cast<StringImmPtr>()->value() + value_y->cast<StringImmPtr>()->value());
+  return std::make_shared<AbstractScalar>(ret);
+}
+
+AbstractBasePtr InferImplMakeTuple(const AnalysisEnginePtr &, const PrimitivePtr &,
+                                   const AbstractBasePtrList &args_spec_list) {
+  return std::make_shared<AbstractTuple>(args_spec_list);
+}
+
+AbstractBasePtr InferImplMakeList(const AnalysisEnginePtr &, const PrimitivePtr &,
+                                  const AbstractBasePtrList &args_spec_list) {
+  return std::make_shared<AbstractList>(args_spec_list);
+}
+
+AbstractBasePtr InferImplMakeDict(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                  const AbstractBasePtrList &args_spec_list) {
+  // Inputs: two tuples.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractTuplePtr keys = CheckArg<AbstractTuple>(op_name, args_spec_list, 0);
+  AbstractTuplePtr values = CheckArg<AbstractTuple>(op_name, args_spec_list, 1);
+
+  size_t keys_size = keys->size();
+  if (values->size() != keys_size) {
+    MS_LOG(EXCEPTION) << op_name << " evaluator keys' size is not equal with values' size";
+  }
+
+  std::vector<AbstractAttribute> key_value;
+  AbstractScalarPtr key;
+  AbstractBasePtrList key_list = keys->elements();
+  AbstractBasePtrList value_list = values->elements();
+  for (size_t index = 0; index < keys_size; index++) {
+    key = CheckArg<AbstractScalar>(op_name + "key", key_list, index);
+    ValuePtr keyPtr = key->BuildValue();
+    MS_EXCEPTION_IF_NULL(keyPtr);
+    if (!keyPtr->isa<StringImm>()) {
+      MS_LOG(EXCEPTION) << op_name << " evaluator keys should be string, but got " << keyPtr->ToString();
+    }
+    std::string key_string = GetValue<std::string>(keyPtr);
+    key_value.emplace_back(key_string, value_list[index]);
+  }
+  return std::make_shared<AbstractDictionary>(key_value);
+}
+
+AbstractBasePtr InferImplMakeKwarg(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                   const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a string and an object of a subclass of AbstractBase.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractScalarPtr key = CheckArg<AbstractScalar>(op_name, args_spec_list, 0);
+
+  ValuePtr keyPtr = key->BuildValue();
+  if (!keyPtr->isa<StringImm>()) {
+    MS_LOG(EXCEPTION) << op_name << " evaluator key should be string, but got " << keyPtr->ToString();
+  }
+  std::string key_string = GetValue<std::string>(keyPtr);
+  return std::make_shared<AbstractKeywordArg>(key_string, args_spec_list[1]);
+}
+
+AbstractBasePtr InferImplExtractKwarg(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                      const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a string and a keyword.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractScalarPtr key = CheckArg<AbstractScalar>(op_name, args_spec_list, 0);
+  AbstractKeywordArgPtr kwarg = CheckArg<AbstractKeywordArg>(op_name, args_spec_list, 1);
+
+  ValuePtr key_value = key->BuildValue();
+  if (!key_value->isa<StringImm>()) {
+    MS_LOG(EXCEPTION) << op_name << " evaluator key should be string, but got " << key_value->ToString();
+  }
+  std::string key_input = GetValue<std::string>(key_value);
+  std::string key_actual = kwarg->get_key();
+  if (key_actual != key_input) {
+    MS_LOG(EXCEPTION) << op_name << " evaluator input key should be same as AbstractKeywordArg' key, but input is "
+                      << key_input << ", AbstractKeywordArg' key is " << key_actual;
+  }
+  return kwarg->get_arg();
+}
+
+AbstractBasePtr InferImplMakeSlice(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                   const AbstractBasePtrList &args_spec_list) {
+  // Inputs: three scalars whose value is an int32 number.
+  CheckArgsSize(primitive->name(), args_spec_list, 3);
+  size_t args_size = args_spec_list.size();
+  for (size_t index = 0; index < args_size; index++) {
+    MS_EXCEPTION_IF_NULL(args_spec_list[index]);
+    if (!args_spec_list[index]->isa<AbstractScalar>() && !args_spec_list[index]->isa<AbstractNone>()) {
+      MS_LOG(EXCEPTION) << "MakeSlice eval " << index << " parameter is neither AbstractScalar nor AbstractNone.";
+    }
+    if (args_spec_list[index]->isa<AbstractScalar>() &&
+        !dyn_cast<AbstractScalar>(args_spec_list[index])->BuildValue()->isa<Int32Imm>()) {
+      MS_LOG(EXCEPTION) << "MakeSlice eval " << index << " parameter is an AbstractScalar, but is not an int32 number.";
+    }
+  }
+  // Slice: start, end, step
+  return std::make_shared<AbstractSlice>(args_spec_list[0], args_spec_list[1], args_spec_list[2]);
+}
+
+// Eval the return type of make_record
+AbstractBasePtr InferImplMakeRecord(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                    const AbstractBasePtrList &args_spec_list) {
+  // Inputs: at lease two objects of a subclass of AbstractBase.
+  if (args_spec_list.size() < 2) {
+    MS_LOG(EXCEPTION) << "Typeof evaluator requires more than 1 parameter, while the input size is "
+                      << args_spec_list.size() << ".";
+  }
+
+  // args_spec_list[0] maybe AbstractScalarPtr or AbstractTypePtr
+  MS_EXCEPTION_IF_NULL(args_spec_list[0]);
+  TypePtr type = args_spec_list[0]->GetTypeTrack();
+  MS_EXCEPTION_IF_NULL(type);
+  if (type->type_id() != kMetaTypeTypeType) {
+    MS_LOG(EXCEPTION) << "Can not make type(" << type->ToString() << ")not TypeType";
+  }
+
+  ValuePtr value_track = args_spec_list[0]->GetValueTrack();
+  MS_EXCEPTION_IF_NULL(value_track);
+  TypePtr type_ptr = value_track->cast<TypePtr>();
+  if (type_ptr == nullptr) {
+    MS_LOG(EXCEPTION) << "Value type error, not Me type:" << value_track->ToString();
+  }
+
+  auto cls = dyn_cast<Class>(type_ptr);
+  MS_EXCEPTION_IF_NULL(cls);
+  ClassAttrVector attributes = cls->GetAttributes();
+  CheckArgsSize(primitive->name(), args_spec_list, attributes.size() + 1);
+
+  std::vector<AbstractAttribute> abs_attributes;
+  for (size_t i = 0; i < attributes.size(); i++) {
+    AbstractAttribute elem(attributes[i].first, args_spec_list[i + 1]);
+    abs_attributes.push_back(elem);
+  }
+
+  return std::make_shared<AbstractClass>(cls->tag(), abs_attributes, cls->methods());
+}
+
+template <typename T>
+AbstractBasePtr InferTupleOrListGetItem(const std::string &op_name, const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a tuple or list and a scalar whose value is an int32 number.
+  CheckArgsSize(op_name, args_spec_list, 2);
+  auto queue = CheckArg<T>(op_name, args_spec_list, 0);
+  AbstractScalarPtr index = CheckArg<AbstractScalar>(op_name, args_spec_list, 1);
+
+  ValuePtr index_value = index->BuildValue();
+  if (!index_value->isa<Int32Imm>()) {
+    // when index_value is an AnyValue and args_spec_list[0] is a scalar, try to return the type of the first element
+    //  and continue
+    if (dyn_cast<AbstractScalar>(queue->elements()[0]) != nullptr) {
+      return std::make_shared<AbstractScalar>(queue->elements()[0]->BuildType());
+    }
+    MS_EXCEPTION(IndexError) << op_name << " evaluator index should be an int32 number, but got "
+                             << index_value->ToString();
+  }
+  int idx_v = GetValue<int>(index_value);
+  std::size_t nelems = queue->elements().size();
+  if (idx_v >= SizeToInt(nelems) || idx_v < -SizeToInt(nelems)) {
+    MS_EXCEPTION(IndexError) << op_name << " evaluator index should be in range[-" << SizeToInt(nelems) << ", "
+                             << SizeToInt(nelems) << "), but got " << idx_v << ".";
+  }
+
+  std::size_t uidx_v = 0;
+  if (idx_v >= 0) {
+    uidx_v = IntToSize(idx_v);
+  } else {
+    uidx_v = IntToSize(idx_v + SizeToInt(nelems));
+  }
+  return queue->elements()[uidx_v];
+}
+
+template <typename T>
+AbstractBasePtr InferTupleOrListSetItem(const std::string &op_name, const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a tuple or list, a scalar whose value is an int32 number and an object of a subclass of AbstractBase.
+  CheckArgsSize(op_name, args_spec_list, 3);
+  auto queue = CheckArg<T>(op_name, args_spec_list, 0);
+  AbstractScalarPtr index = CheckArg<AbstractScalar>(op_name, args_spec_list, 1);
+
+  ValuePtr index_value = index->BuildValue();
+  if (!index_value->isa<Int32Imm>()) {
+    MS_EXCEPTION(IndexError) << op_name << " evaluator index should be an int32 number, but got "
+                             << index_value->ToString();
+  }
+  int idx_v = GetValue<int>(index_value);
+  if (idx_v < 0) {
+    MS_EXCEPTION(IndexError) << "The index of " << typeid(T).name() << " should be positive number, but got " << idx_v
+                             << ".";
+  }
+
+  size_t uidx_v = IntToSize(idx_v);
+  AbstractBasePtrList elements = queue->elements();
+  std::size_t nelems = elements.size();
+  if (uidx_v >= nelems) {
+    MS_EXCEPTION(IndexError) << op_name << " evaluator the index: " << uidx_v << " to set out of range: " << nelems - 1
+                             << ".";
+  }
+  elements[uidx_v] = args_spec_list[2];
+  return std::make_shared<T>(elements);
+}
+
+AbstractBasePtr InferImplTupleGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                      const AbstractBasePtrList &args_spec_list) {
+  return InferTupleOrListGetItem<AbstractTuple>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplListGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                     const AbstractBasePtrList &args_spec_list) {
+  return InferTupleOrListGetItem<AbstractList>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplTupleSetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                      const AbstractBasePtrList &args_spec_list) {
+  return InferTupleOrListSetItem<AbstractTuple>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplListSetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                     const AbstractBasePtrList &args_spec_list) {
+  return InferTupleOrListSetItem<AbstractList>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplDictGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                     const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a dict and a scalar whose value is a string.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractDictionaryPtr dict = CheckArg<AbstractDictionary>(op_name, args_spec_list, 0);
+  AbstractScalarPtr key = CheckArg<AbstractScalar>(op_name, args_spec_list, 1);
+
+  ValuePtr key_value = key->BuildValue();
+  if (!key_value->isa<StringImm>()) {
+    MS_LOG(EXCEPTION) << op_name << " evaluator key should be string, but got " << key_value->ToString();
+  }
+  auto key_str = GetValue<std::string>(key_value);
+  std::vector<AbstractAttribute> dict_elems = dict->elements();
+  auto it = std::find_if(dict_elems.begin(), dict_elems.end(),
+                         [key_str](const AbstractAttribute &item) { return item.first == key_str; });
+
+  if (it == dict_elems.end()) {
+    MS_LOG(EXCEPTION) << "The key " << key_str << " does not exist in the dict:" << args_spec_list[0]->ToString();
+  }
+  return it->second;
+}
+
+AbstractBasePtr InferImplDictSetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                     const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a dict and a scalar whose value is a string and an object of a subclass of AbstractBase.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 3);
+  AbstractDictionaryPtr dict = CheckArg<AbstractDictionary>(op_name, args_spec_list, 0);
+  AbstractScalarPtr key = CheckArg<AbstractScalar>(op_name, args_spec_list, 1);
+
+  ValuePtr key_value = key->BuildValue();
+  if (!key_value->isa<StringImm>()) {
+    MS_LOG(EXCEPTION) << op_name << " evaluator key should be string, but got " << key_value->ToString();
+  }
+  std::string key_str = GetValue<std::string>(key_value);
+  std::vector<AbstractAttribute> dict_elems = dict->elements();
+  auto it = std::find_if(dict_elems.begin(), dict_elems.end(),
+                         [key_str](AbstractAttribute &item) { return item.first == key_str; });
+
+  MS_EXCEPTION_IF_NULL(args_spec_list[2]);
+  auto new_ele = std::make_pair(key_str, args_spec_list[2]);
+  if (it != dict_elems.end()) {
+    int index = it - dict_elems.begin();
+    dict_elems[IntToSize(index)] = new_ele;
+  } else {
+    dict_elems.push_back(new_ele);
+  }
+  return std::make_shared<AbstractDictionary>(dict_elems);
+}
+
+AbstractBasePtr InferImplListAppend(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                    const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a list and an object of a subclass of AbstractBase.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractListPtr list = CheckArg<AbstractList>(op_name, args_spec_list, 0);
+  (void)AbstractJoin(list->elements());
+  return list;
+}
+
+template <typename T>
+AbstractBasePtr InferTupleOrListOrDictLen(const std::string &op_name, const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a tuple or list or dict.
+  CheckArgsSize(op_name, args_spec_list, 1);
+  auto arg = CheckArg<T>(op_name, args_spec_list, 0);
+  return std::make_shared<AbstractScalar>(SizeToInt(arg->size()));
+}
+
+AbstractBasePtr InferImplTupleLen(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                  const AbstractBasePtrList &args_spec_list) {
+  return InferTupleOrListOrDictLen<AbstractTuple>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplListLen(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                 const AbstractBasePtrList &args_spec_list) {
+  return InferTupleOrListOrDictLen<AbstractList>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplDictLen(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                 const AbstractBasePtrList &args_spec_list) {
+  return InferTupleOrListOrDictLen<AbstractDictionary>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplArrayLen(const AnalysisEnginePtr &, const PrimitivePtr &,
+                                  const AbstractBasePtrList &args_spec_list) {
+  return std::make_shared<AbstractScalar>(kAnyValue, kInt32);
+}
+
+AbstractBasePtr InferImplListMap(const AnalysisEnginePtr &engine, const PrimitivePtr &primitive,
+                                 const AbstractBasePtrList &args_spec_list) {
+  // Inputs: fn, list1, list2, ...
+  MS_EXCEPTION_IF_NULL(engine);
+  if (args_spec_list.size() <= 1) {
+    MS_LOG(EXCEPTION) << "List_map requires at least 1 list. while the input size is  " << args_spec_list.size() << ".";
+  }
+  AbstractFunctionPtr fn = CheckArg<AbstractFunction>(primitive->name(), args_spec_list, 0);
+  // check args from 1.
+  CheckArgsSpec<AbstractList>(AbstractBasePtrList(args_spec_list.begin() + 1, args_spec_list.end()));
+
+  AbstractBasePtrList subargs;
+  for (std::size_t i = 1; i < args_spec_list.size(); i++) {
+    AbstractListPtr l_ptr = dyn_cast<AbstractList>(args_spec_list[i]);
+    if (l_ptr == nullptr) {
+      MS_LOG(EXCEPTION) << "Argument[" << i << "] of list_map should be a list.";
+    }
+    subargs.push_back(AbstractJoin(l_ptr->elements()));
+  }
+  EvalResultPtr engin_exc = engine->Execute(fn, subargs);
+  AbstractBasePtrList result;
+  for (std::size_t i = 1; i < args_spec_list.size(); i++) {
+    result.push_back(engin_exc->abstract());
+  }
+  return std::make_shared<AbstractList>(result);
+}
+
+AbstractBasePtr InferImplListReduce(const AnalysisEnginePtr &engine, const PrimitivePtr &primitive,
+                                    const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a fn, a list and an object of a subclass of a AbstractBase.
+  MS_EXCEPTION_IF_NULL(engine);
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 3);
+  AbstractFunctionPtr fn = CheckArg<AbstractFunction>(op_name, args_spec_list, 0);
+  AbstractListPtr lst = CheckArg<AbstractList>(op_name, args_spec_list, 1);
+  AbstractBasePtr dflt = args_spec_list[2];
+
+  AbstractBasePtr list_type = AbstractJoin(lst->elements());
+  auto result1 = engine->Execute(fn, lst->elements());
+  auto result2 = engine->Execute(fn, {dflt, list_type});
+  MS_EXCEPTION_IF_NULL(result1->abstract());
+  MS_EXCEPTION_IF_NULL(result2->abstract());
+  return result1->abstract()->Join(result2->abstract());
+}
+
+AbstractBasePtr InferImplTupleReversed(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                       const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a tuple
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 1);
+  AbstractTuplePtr input = CheckArg<AbstractTuple>(op_name, args_spec_list, 0);
+
+  auto tuple_elements = input->elements();
+  AbstractBasePtrList elem_list;
+  (void)std::transform(tuple_elements.rbegin(), tuple_elements.rend(), std::back_inserter(elem_list),
+                       [](const AbstractBasePtr &elem) { return elem->Clone(); });
+  return std::make_shared<AbstractTuple>(elem_list);
+}
+
+AbstractBasePtr DoInferReduceShape(const AbstractTuplePtr &x_shape, const ValuePtr &x_shp_value,
+                                   const ValueTuplePtr &axis_value_ptr, const PrimitivePtr &primitive) {
+  size_t x_rank = x_shape->size();
+  std::set<int> axis_set;
+  auto axis_data = axis_value_ptr->value();
+  if (axis_data.empty()) {
+    int size = 1;
+    AbstractBasePtrList values(x_rank, std::make_shared<AbstractScalar>(size));
+    return std::make_shared<AbstractTuple>(values);
+  }
+
+  for (auto &elem : axis_data) {
+    int e_value = CheckAxis(primitive->name(), elem, -SizeToInt(x_rank), SizeToInt(x_rank) - 1);
+    (void)axis_set.insert(e_value);
+  }
+
+  auto x_shp_data = x_shp_value->cast<ValueTuplePtr>()->value();
+  if (x_shp_data.size() < x_rank) {
+    MS_LOG(EXCEPTION) << "x_shape_data.size() " << x_shp_data.size() << " less than x_shape.size() " << x_rank;
+  }
+  AbstractBasePtrList values;
+  for (size_t i = 0; i < x_rank; i++) {
+    if (axis_set.count(SizeToInt(i)) || axis_set.count(SizeToInt(i) - SizeToInt(x_rank))) {
+      auto axis_v = MakeValue(1);
+      values.push_back(std::make_shared<AbstractScalar>(axis_v, axis_v->type()));
+    } else {
+      int dim_value = x_shp_data[i]->cast<Int32ImmPtr>()->value();
+      auto dim = MakeValue(dim_value);
+      values.push_back(std::make_shared<AbstractScalar>(dim, dim->type()));
+    }
+  }
+
+  return std::make_shared<AbstractTuple>(values);
+}
+
+AbstractBasePtr InferImplReduceShape(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                     const AbstractBasePtrList &args_spec_list) {
+  // Inputs: x_shape, axis
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractTuplePtr shape_x = CheckArg<AbstractTuple>(op_name, args_spec_list, 0);
+  MS_EXCEPTION_IF_NULL(args_spec_list[1]);
+
+  auto x_shp_value = shape_x->BuildValue();
+  if (x_shp_value->isa<AnyValue>()) {
+    MS_LOG(EXCEPTION) << op_name
+                      << " evaluator shape's data field can't be anything: " << args_spec_list[1]->ToString();
+  }
+
+  // Axis can be scalar, tuple or None
+  AbstractTuplePtr axis = nullptr;
+  if (args_spec_list[1]->isa<AbstractScalar>()) {
+    MS_LOG(DEBUG) << op_name << " evaluator second parameter is scalar";
+    AbstractBasePtrList axis_list = {dyn_cast<AbstractScalar>(args_spec_list[1])};
+    axis = std::make_shared<AbstractTuple>(axis_list);
+  } else if (args_spec_list[1]->isa<AbstractTuple>()) {
+    MS_LOG(DEBUG) << op_name << " evaluator second parameter is tuple";
+    axis = args_spec_list[1]->cast<AbstractTuplePtr>();
+  } else {
+    MS_LOG(EXCEPTION) << op_name << " evaluator second parameter should be a scalar or tuple, but got "
+                      << args_spec_list[1]->ToString();
+  }
+
+  auto axis_value = axis->BuildValue();
+  if (axis_value->isa<AnyValue>()) {
+    MS_LOG(EXCEPTION) << op_name
+                      << " evaluator shape's data field can't be anything: " << args_spec_list[1]->ToString();
+  }
+  auto axis_value_ptr = axis_value->cast<ValueTuplePtr>();
+  MS_EXCEPTION_IF_NULL(axis_value_ptr);
+
+  return DoInferReduceShape(shape_x, x_shp_value, axis_value_ptr, primitive);
+}
+
+AbstractBasePtr InferImplTupleDiv(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                  const AbstractBasePtrList &args_spec_list) {
+  // Inputs: two tuples.
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 2);
+  AbstractTuplePtr shape_x = CheckArg<AbstractTuple>(op_name, args_spec_list, 0);
+  AbstractTuplePtr div_shp = CheckArg<AbstractTuple>(op_name, args_spec_list, 1);
+  MS_LOG(INFO) << "DivShape input:" << shape_x->ToString() << ", div:" << div_shp->ToString();
+
+  auto div_shp_value = div_shp->BuildValue();
+  if (div_shp_value->isa<AnyValue>()) {
+    MS_LOG(EXCEPTION) << "shape's data field can't be anythin: " << args_spec_list[0]->ToString();
+  }
+
+  auto shpx_value = shape_x->BuildValue();
+  if (shpx_value->isa<AnyValue>()) {
+    MS_LOG(EXCEPTION) << "shape's data field can't be anythin: " << args_spec_list[1]->ToString();
+  }
+
+  if (div_shp->size() != shape_x->size()) {
+    MS_LOG(EXCEPTION) << "tileshape elems shape must the same div_shp: " << div_shp->size()
+                      << ", shapex: " << shape_x->size() << ".";
+  }
+
+  auto shpx_data = shpx_value->cast<ValueTuplePtr>()->value();
+  auto div_shp_data = div_shp_value->cast<ValueTuplePtr>()->value();
+  AbstractBasePtrList values;
+
+  for (size_t i = 0; i < div_shp_data.size(); i++) {
+    if (div_shp_data[i]->cast<Int32ImmPtr>() == nullptr) {
+      MS_LOG(EXCEPTION) << "div_shp_shape data should be an int32 number, but it's " << args_spec_list[1]->ToString();
+    }
+    int shapex_value = GetValue<int>(shpx_data[i]);
+    int div_value = GetValue<int>(div_shp_data[i]);
+    MS_LOG(DEBUG) << "div_shp_shape data shapex_value :" << shapex_value << " div_value: " << div_value;
+    if (div_value == 0) {
+      MS_LOG(EXCEPTION) << "error: division value should not be 0!";
+    }
+    if ((shapex_value % div_value) != 0) {
+      MS_LOG(EXCEPTION) << "div_shp_shape data shapex must div int:" << shapex_value << " div_value: " << div_value;
+    }
+
+    int result = shapex_value / div_value;
+    auto result_v = MakeValue(result);
+    values.push_back(std::make_shared<AbstractScalar>(result_v, result_v->type()));
+  }
+
+  return std::make_shared<AbstractTuple>(values);
+}
+
+AbstractBasePtr InferImplTuple2Array(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                     const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a tuple
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 1);
+  AbstractTuplePtr input = CheckArg<AbstractTuple>(op_name, args_spec_list, 0);
+
+  py::tuple data_tuple = ValuePtrToPyData(input->BuildValue());
+  py::array data = py::array(data_tuple);
+  auto tensor = TensorPy::MakeTensor(data);
+  auto ret = tensor->ToAbstract();
+  ret->set_value(tensor);
+  MS_LOG(DEBUG) << "Tuple2arry result AbstractTensor: " << ret->ToString();
+  return ret;
+}
+
+AbstractBasePtr InferImplShapeMul(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                  const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a tuple
+  // example: tuple = (1, 2, 3), shape_mul(tuple) = 1*2*3 = 6
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 1);
+  AbstractTuplePtr shape_x = CheckArg<AbstractTuple>(op_name, args_spec_list, 0);
+
+  auto shpx_value = shape_x->BuildValue();
+  if (shpx_value->isa<AnyValue>()) {
+    MS_LOG(EXCEPTION) << "shape's data field can't be anythin: " << shape_x->ToString();
+  }
+
+  auto shpx_data = shpx_value->cast<ValueTuplePtr>()->value();
+
+  int result = 1;
+  for (size_t i = 0; i < shpx_data.size(); i++) {
+    int value = GetValue<int>(shpx_data[i]);
+    result = IntMulWithOverflowCheck(result, value);
+  }
+
+  auto result_v = MakeValue(result);
+  MS_LOG(DEBUG) << "shape mul result:" << result_v->ToString();
+  return std::make_shared<AbstractScalar>(result_v, result_v->type());
+}
+
+template <typename T>
+AbstractBasePtr InferImplTupleOrListEqual(const std::string &op_name, const AbstractBasePtrList &args_spec_list) {
+  // Inputs: two tuples or two lists.
+  CheckArgsSize(op_name, args_spec_list, 2);
+  auto input_x = CheckArg<T>(op_name, args_spec_list, 0);
+  auto input_y = CheckArg<T>(op_name, args_spec_list, 1);
+
+  ValuePtr x_value = input_x->BuildValue();
+  ValuePtr y_value = input_y->BuildValue();
+  return std::make_shared<AbstractScalar>(*x_value == *y_value);
+}
+
+AbstractBasePtr InferImplTupleEqual(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                    const AbstractBasePtrList &args_spec_list) {
+  return InferImplTupleOrListEqual<AbstractTuple>(primitive->name(), args_spec_list);
+}
+
+AbstractBasePtr InferImplListEqual(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                   const AbstractBasePtrList &args_spec_list) {
+  return InferImplTupleOrListEqual<AbstractList>(primitive->name(), args_spec_list);
+}
+
+struct SlideInfo {
+  int start;
+  int step;
+  int stop;
+};
+
+void CalcSlidePara(const AbstractBasePtrList &args_spec_list, SlideInfo *slide) {
+  int arg1 = 0;
+  int arg2 = 0;
+  if (!args_spec_list.empty()) {
+    MS_EXCEPTION_IF_NULL(args_spec_list[0]);
+    auto arg_value = args_spec_list[0]->BuildValue();
+    if (!arg_value->isa<Int32Imm>()) {
+      MS_LOG(EXCEPTION) << "Only supported input an int32 number.";
+    }
+    arg1 = GetValue<int>(arg_value);
+  }
+
+  if (args_spec_list.size() >= 2) {
+    MS_EXCEPTION_IF_NULL(args_spec_list[1]);
+    auto arg_value = args_spec_list[1]->BuildValue();
+    if (!arg_value->isa<Int32Imm>()) {
+      MS_LOG(EXCEPTION) << "Only supported input an int32 number.";
+    }
+    arg2 = GetValue<int>(arg_value);
+  }
+
+  if (args_spec_list.size() == 3) {
+    MS_EXCEPTION_IF_NULL(args_spec_list[2]);
+    auto arg_value = args_spec_list[2]->BuildValue();
+    if (!arg_value->isa<Int32Imm>()) {
+      MS_LOG(EXCEPTION) << "Only supported input an int32 number.";
+    }
+    slide->step = GetValue<int>(arg_value);
+    slide->start = arg1;
+    slide->stop = arg2;
+  }
+
+  if (args_spec_list.size() == 2) {
+    slide->start = arg1;
+    slide->stop = arg2;
+  }
+
+  if (args_spec_list.size() == 1) {
+    slide->stop = arg1;
+  }
+}
+
+AbstractBasePtr InferImplMakeRange(const AnalysisEnginePtr &, const PrimitivePtr &,
+                                   const AbstractBasePtrList &args_spec_list) {
+  if (args_spec_list.empty()) {
+    MS_LOG(EXCEPTION) << "Cannot make range from empty input.";
+  }
+
+  if (args_spec_list.size() > 3) {
+    MS_LOG(EXCEPTION) << "Error args size of make range operational.";
+  }
+
+  SlideInfo slide = {0, 1, 0};
+  CalcSlidePara(args_spec_list, &slide);
+
+  if (slide.step == 0) {
+    MS_LOG(EXCEPTION) << "Error, step value is 0.";
+  }
+
+  AbstractBasePtrList args;
+  if (slide.start <= slide.stop) {
+    if (slide.step <= 0) {
+      MS_LOG(EXCEPTION) << "Error slice[" << slide.start << ", " << slide.stop << ", " << slide.step << "]";
+    }
+    for (int i = slide.start; i < slide.stop; i += slide.step) {
+      args.push_back(abstract::FromValue(i));
+    }
+  } else {
+    if (slide.step >= 0) {
+      MS_LOG(EXCEPTION) << "Error slice[" << slide.start << ", " << slide.stop << ", " << slide.step << "]";
+    }
+    for (int i = slide.start; i > slide.stop; i += slide.step) {
+      args.push_back(abstract::FromValue(i));
+    }
+  }
+
+  return std::make_shared<AbstractTuple>(args);
+}
+
+AbstractBasePtr InferImplStopGradient(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                      const AbstractBasePtrList &args_spec_list) {
+  // Inputs: a tensor
+  CheckArgsSize(primitive->name(), args_spec_list, 1);
+  return args_spec_list[0]->Clone();
+}
+}  // namespace abstract
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/operator/prim_to_function.cc b/mindspore/ccsrc/frontend/operator/prim_to_function.cc
similarity index 98%
rename from mindspore/ccsrc/operator/prim_to_function.cc
rename to mindspore/ccsrc/frontend/operator/prim_to_function.cc
index 733cdbdb73c..7b9592e80e2 100644
--- a/mindspore/ccsrc/operator/prim_to_function.cc
+++ b/mindspore/ccsrc/frontend/operator/prim_to_function.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "operator/prim_to_function.h"
+#include "frontend/operator/prim_to_function.h"
 #include <exception>
 #include <iostream>
 #include <string>
diff --git a/mindspore/ccsrc/operator/prim_to_function.h b/mindspore/ccsrc/frontend/operator/prim_to_function.h
similarity index 100%
rename from mindspore/ccsrc/operator/prim_to_function.h
rename to mindspore/ccsrc/frontend/operator/prim_to_function.h
diff --git a/mindspore/ccsrc/optimizer/CMakeLists.txt b/mindspore/ccsrc/frontend/optimizer/CMakeLists.txt
similarity index 71%
rename from mindspore/ccsrc/optimizer/CMakeLists.txt
rename to mindspore/ccsrc/frontend/optimizer/CMakeLists.txt
index 44af01735ac..14fda83052f 100644
--- a/mindspore/ccsrc/optimizer/CMakeLists.txt
+++ b/mindspore/ccsrc/frontend/optimizer/CMakeLists.txt
@@ -1,3 +1,3 @@
 file(GLOB_RECURSE _OPTIMIZER_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_OPTIMIZER_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_OPTIMIZER)
-add_library(_mindspore_optimizer_obj OBJECT ${_OPTIMIZER_SRC_FILES})
+add_library(_mindspore_frontend_optimizer_obj OBJECT ${_OPTIMIZER_SRC_FILES})
diff --git a/mindspore/ccsrc/optimizer/ad/adjoint.cc b/mindspore/ccsrc/frontend/optimizer/ad/adjoint.cc
similarity index 97%
rename from mindspore/ccsrc/optimizer/ad/adjoint.cc
rename to mindspore/ccsrc/frontend/optimizer/ad/adjoint.cc
index ed89aba20e6..60ccf28df46 100644
--- a/mindspore/ccsrc/optimizer/ad/adjoint.cc
+++ b/mindspore/ccsrc/frontend/optimizer/ad/adjoint.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "optimizer/ad/adjoint.h"
+#include "frontend/optimizer/ad/adjoint.h"
 
 #include <utility>
 #include <vector>
 
 #include "ir/anf.h"
-#include "optimizer/ad/dfunctor.h"
+#include "frontend/optimizer/ad/dfunctor.h"
 
 namespace mindspore {
 namespace ad {
diff --git a/mindspore/ccsrc/optimizer/ad/adjoint.h b/mindspore/ccsrc/frontend/optimizer/ad/adjoint.h
similarity index 97%
rename from mindspore/ccsrc/optimizer/ad/adjoint.h
rename to mindspore/ccsrc/frontend/optimizer/ad/adjoint.h
index b2dae8e66f1..37986e6810b 100644
--- a/mindspore/ccsrc/optimizer/ad/adjoint.h
+++ b/mindspore/ccsrc/frontend/optimizer/ad/adjoint.h
@@ -22,7 +22,7 @@
 #include <vector>
 
 #include "ir/anf.h"
-#include "optimizer/opt.h"
+#include "frontend/optimizer/opt.h"
 
 namespace mindspore {
 namespace ad {
diff --git a/mindspore/ccsrc/optimizer/ad/dfunctor.cc b/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc
similarity index 96%
rename from mindspore/ccsrc/optimizer/ad/dfunctor.cc
rename to mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc
index f9c056a84ef..b314b22f818 100644
--- a/mindspore/ccsrc/optimizer/ad/dfunctor.cc
+++ b/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/ad/dfunctor.h"
+#include "frontend/optimizer/ad/dfunctor.h"
 
 #include <memory>
 #include <string>
@@ -25,12 +25,12 @@
 #include "debug/info.h"
 #include "ir/func_graph_cloner.h"
 #include "ir/manager.h"
-#include "pipeline/resource.h"
-#include "pipeline/parse/parse.h"
-#include "optimizer/ad/adjoint.h"
-#include "optimizer/opt.h"
-#include "operator/ops.h"
-#include "operator/composite/composite.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/parse/parse.h"
+#include "frontend/optimizer/ad/adjoint.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/operator/ops.h"
+#include "frontend/operator/composite/composite.h"
 #include "utils/symbolic.h"
 #include "utils/context/ms_context.h"
 #include "./common.h"
@@ -99,14 +99,14 @@ void DFunctor::BackPropagateFv(const AnfNodePtr &fv, const AnfNodePtr &din) {
       fv_adjoint = anfnode_to_adjoin_indirect_fv_.find(fv);
     }
   }
-  auto key = tape_->NewCNode({NewValueNode(prim::kPrimEmbed), fv_adjoint->second->k()});
-  fv_adjoint->second->RegisterKUser(key, 1);
+  auto node = tape_->NewCNode({NewValueNode(prim::kPrimEmbed), fv_adjoint->second->k()});
+  fv_adjoint->second->RegisterKUser(node, 1);
   auto default_val = tape_->NewCNode({NewValueNode(prim::GetPythonOps("zeros_like")), fv_adjoint->second->k()});
   fv_adjoint->second->RegisterKUser(default_val, 1);
-  auto dfv = tape_->NewCNode({NewValueNode(prim::kPrimEnvGetItem), din, key, default_val});
+  auto dfv = tape_->NewCNode({NewValueNode(prim::kPrimEnvGetItem), din, node, default_val});
   MS_LOG(DEBUG) << "BackPropagateFv find adjoint in anfnode_to_adjoin_ or anfnode_to_adjoin_indirect_fv_ fv "
                 << fv->func_graph()->ToString() << " " << fv->ToString() << ".";
-  MS_LOG(DEBUG) << "BackPropagateFv get item from " << din->ToString() << " key " << key->ToString() << ".";
+  MS_LOG(DEBUG) << "BackPropagateFv get item from " << din->ToString() << " key " << node->ToString() << ".";
   fv_adjoint->second->AccumulateDout(dfv);
 }
 
@@ -279,13 +279,13 @@ AnfNodePtr DFunctor::AttachFvDoutToTape(const AnfNodePtr &grad_fv) {
     if (fv_adjoint == anfnode_to_adjoin_.end()) {
       MS_LOG(EXCEPTION) << "AttachFvDoutToTape fv adjoint does not exist " << fv->ToString() << ".";
     }
-    auto key = tape_->NewCNode({NewValueNode(prim::kPrimEmbed), fv_adjoint->second->k()});
-    fv_adjoint->second->RegisterKUser(key, 1);
+    auto node = tape_->NewCNode({NewValueNode(prim::kPrimEmbed), fv_adjoint->second->k()});
+    fv_adjoint->second->RegisterKUser(node, 1);
     auto sens = fv_adjoint->second->dout();
     new_grad_fv = tape_->NewCNode({
       NewValueNode(prim::kPrimEnvSetItem),
       new_grad_fv,
-      key,
+      node,
       sens,
     });
     fv_adjoint->second->RegisterDoutUser(new_grad_fv->cast<CNodePtr>(), 3);
@@ -301,13 +301,13 @@ AnfNodePtr DFunctor::AttachIndirectFvDoutToTape(const AnfNodePtr &grad_fv) {
   for (auto &fv_adjoint : anfnode_to_adjoin_indirect_fv_) {
     MS_LOG(DEBUG) << "AttachIndirectFvDoutToTape backprop indirect fv " << fv_adjoint.first->ToString() << " "
                   << primal_graph_->ToString() << ".";
-    auto key = tape_->NewCNode({NewValueNode(prim::kPrimEmbed), fv_adjoint.second->k()});
-    fv_adjoint.second->RegisterKUser(key, 1);
+    auto node = tape_->NewCNode({NewValueNode(prim::kPrimEmbed), fv_adjoint.second->k()});
+    fv_adjoint.second->RegisterKUser(node, 1);
     auto sens = fv_adjoint.second->dout();
     new_grad_fv = tape_->NewCNode({
       NewValueNode(prim::kPrimEnvSetItem),
       new_grad_fv,
-      key,
+      node,
       sens,
     });
     fv_adjoint.second->RegisterDoutUser(new_grad_fv->cast<CNodePtr>(), 3);
diff --git a/mindspore/ccsrc/optimizer/ad/dfunctor.h b/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.h
similarity index 93%
rename from mindspore/ccsrc/optimizer/ad/dfunctor.h
rename to mindspore/ccsrc/frontend/optimizer/ad/dfunctor.h
index 4fa9cf6bb53..9ee93334e89 100644
--- a/mindspore/ccsrc/optimizer/ad/dfunctor.h
+++ b/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.h
@@ -28,36 +28,18 @@
 #include "ir/anf.h"
 #include "ir/meta_func_graph.h"
 #include "ir/func_graph_cloner.h"
-#include "pipeline/resource.h"
-#include "optimizer/ad/adjoint.h"
-#include "operator/ops.h"
+#include "pipeline/jit/resource.h"
+#include "frontend/optimizer/ad/adjoint.h"
+#include "frontend/operator/ops.h"
 #include "debug/trace.h"
 
 namespace mindspore {
 namespace ad {
 struct PrimitiveTotalEqual {
   bool operator()(PrimitivePtr const &t1, PrimitivePtr const &t2) const {
-    if (t1->name() != t2->name()) {
-      return false;
-    }
-
-    auto const &attrs1 = t1->attrs();
-    auto const &attrs2 = t2->attrs();
-    if (attrs1.size() != attrs2.size()) {
-      return false;
-    }
-
-    for (auto &attr1 : attrs1) {
-      if (!t2->HasAttr(attr1.first)) {
-        return false;
-      }
-
-      if (!(*(attr1.second) == *(t2->GetAttr(attr1.first)))) {
-        return false;
-      }
-    }
-
-    return true;
+    MS_EXCEPTION_IF_NULL(t1);
+    MS_EXCEPTION_IF_NULL(t2);
+    return *t1 == *t2;
   }
 };
 
diff --git a/mindspore/ccsrc/optimizer/ad/grad.cc b/mindspore/ccsrc/frontend/optimizer/ad/grad.cc
similarity index 96%
rename from mindspore/ccsrc/optimizer/ad/grad.cc
rename to mindspore/ccsrc/frontend/optimizer/ad/grad.cc
index d141dc6eeae..ef2d7d400a4 100644
--- a/mindspore/ccsrc/optimizer/ad/grad.cc
+++ b/mindspore/ccsrc/frontend/optimizer/ad/grad.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "optimizer/ad/grad.h"
-#include "optimizer/ad/dfunctor.h"
+#include "frontend/optimizer/ad/grad.h"
+#include "frontend/optimizer/ad/dfunctor.h"
 #include "ir/func_graph_cloner.h"
 #include "utils/context/ms_context.h"
 #include "utils/symbolic.h"
diff --git a/mindspore/ccsrc/optimizer/ad/grad.h b/mindspore/ccsrc/frontend/optimizer/ad/grad.h
similarity index 97%
rename from mindspore/ccsrc/optimizer/ad/grad.h
rename to mindspore/ccsrc/frontend/optimizer/ad/grad.h
index a878aa9df73..ee9ab79ffbc 100644
--- a/mindspore/ccsrc/optimizer/ad/grad.h
+++ b/mindspore/ccsrc/frontend/optimizer/ad/grad.h
@@ -22,7 +22,7 @@
 
 #include "ir/anf.h"
 #include "ir/meta_func_graph.h"
-#include "pipeline/resource.h"
+#include "pipeline/jit/resource.h"
 
 namespace mindspore {
 namespace ad {
diff --git a/mindspore/ccsrc/optimizer/ad/kprim.cc b/mindspore/ccsrc/frontend/optimizer/ad/kprim.cc
similarity index 96%
rename from mindspore/ccsrc/optimizer/ad/kprim.cc
rename to mindspore/ccsrc/frontend/optimizer/ad/kprim.cc
index 4141fb5413b..5ca2ca6c43d 100644
--- a/mindspore/ccsrc/optimizer/ad/kprim.cc
+++ b/mindspore/ccsrc/frontend/optimizer/ad/kprim.cc
@@ -20,16 +20,16 @@
 #include <string>
 #include <utility>
 #include "ir/anf.h"
-#include "ir/primitive.h"
+#include "ir/primitive_py.h"
 #include "ir/meta_func_graph.h"
 #include "ir/func_graph_cloner.h"
 #include "ir/manager.h"
-#include "pipeline/resource.h"
-#include "pipeline/parse/parse.h"
-#include "optimizer/ad/dfunctor.h"
-#include "optimizer/opt.h"
-#include "operator/ops.h"
-#include "operator/composite/composite.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/parse/parse.h"
+#include "frontend/optimizer/ad/dfunctor.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/operator/ops.h"
+#include "frontend/operator/composite/composite.h"
 #include "utils/symbolic.h"
 #include "utils/primitive_utils.h"
 #include "utils/context/ms_context.h"
@@ -232,10 +232,7 @@ FuncGraphPtr KPrim::BpropCut(const ValueNodePtr &value_node, const pipeline::Res
   std::vector<AnfNodePtr> outputs;
 
   auto bprop_cut = std::make_shared<PrimitivePy>("bprop_cut", py::object());
-  if (!prim->is_base()) {
-    PrimitivePyPtr prim_py = dyn_cast<PrimitivePy>(prim);
-    bprop_cut->set_hook(prim_py->hook());
-  }
+  bprop_cut->CopyHookFunction(prim);
 
   auto cell_id = GetValue<std::string>(prim->GetAttr("cell_id"));
   if (cell_id != "") {
diff --git a/mindspore/ccsrc/optimizer/clean.cc b/mindspore/ccsrc/frontend/optimizer/clean.cc
similarity index 96%
rename from mindspore/ccsrc/optimizer/clean.cc
rename to mindspore/ccsrc/frontend/optimizer/clean.cc
index bb522735681..e35760ceaf3 100644
--- a/mindspore/ccsrc/optimizer/clean.cc
+++ b/mindspore/ccsrc/frontend/optimizer/clean.cc
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/clean.h"
+#include "frontend/optimizer/clean.h"
 #include <map>
 #include <string>
 #include <vector>
@@ -24,7 +24,7 @@
 #include <utility>
 #include "./common.h"
 #include "debug/trace.h"
-#include "operator/composite/composite.h"
+#include "frontend/operator/composite/composite.h"
 
 namespace mindspore {
 /* namespace to support opt */
@@ -43,26 +43,28 @@ static AbstractBasePtr Reabs(const AbstractBasePtr &t) {
     return nullptr;
   }
 
-  AbstractBasePtr res = t;
   if (t->isa<AbstractClass>()) {
     auto abs_class = dyn_cast<AbstractClass>(t);
     AbstractBasePtrList baselist;
     auto attributes = abs_class->attributes();
     (void)std::transform(attributes.begin(), attributes.end(), std::back_inserter(baselist),
                          [](const AbstractAttribute &item) { return item.second; });
-    res = std::make_shared<AbstractTuple>(baselist);
-  } else if (t->isa<AbstractDictionary>()) {
+    return std::make_shared<AbstractTuple>(baselist);
+  }
+  if (t->isa<AbstractDictionary>()) {
     auto abs_dict = dyn_cast<AbstractDictionary>(t);
     AbstractBasePtrList baselist;
     auto elements = abs_dict->elements();
     (void)std::transform(elements.begin(), elements.end(), std::back_inserter(baselist),
                          [](const AbstractAttribute &item) { return item.second; });
-    res = std::make_shared<AbstractTuple>(baselist);
-  } else if (t->isa<AbstractList>()) {
-    auto abs_dict = dyn_cast<AbstractList>(t);
-    res = std::make_shared<AbstractTuple>(abs_dict->elements());
+    return std::make_shared<AbstractTuple>(baselist);
   }
-  return res;
+  if (t->isa<AbstractList>()) {
+    auto abs_list = dyn_cast<AbstractList>(t);
+    return std::make_shared<AbstractTuple>(abs_list->elements());
+  }
+
+  return nullptr;
 }
 
 AnfNodePtr ConvertGetAttrToTupleGetItem(const CNodePtr &node) {
@@ -376,7 +378,12 @@ bool SimplifyDataStructures(const FuncGraphPtr &root, const FuncGraphManagerPtr
 
   for (auto &node : manager->all_nodes()) {
     auto ret = Reabs(node->abstract());
-    node->set_abstract(ret);
+    if (ret) {
+      MS_LOG(DEBUG) << "Replace " << node->DebugString() << "'s abstract " << node->abstract()->ToString() << " with "
+                    << ret->ToString();
+      node->set_abstract(ret);
+      changed = true;
+    }
   }
   return changed;
 }
diff --git a/mindspore/ccsrc/optimizer/clean.h b/mindspore/ccsrc/frontend/optimizer/clean.h
similarity index 94%
rename from mindspore/ccsrc/optimizer/clean.h
rename to mindspore/ccsrc/frontend/optimizer/clean.h
index 0130ecfb32a..54faabaa636 100644
--- a/mindspore/ccsrc/optimizer/clean.h
+++ b/mindspore/ccsrc/frontend/optimizer/clean.h
@@ -21,10 +21,10 @@
 
 #include <memory>
 #include "ir/anf.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/any.h"
 #include "ir/manager.h"
-#include "pipeline/static_analysis/dshape.h"
+#include "abstract/dshape.h"
 
 namespace mindspore {
 /* namespace to support opt */
diff --git a/mindspore/ccsrc/optimizer/control_depend.cc b/mindspore/ccsrc/frontend/optimizer/control_depend.cc
similarity index 98%
rename from mindspore/ccsrc/optimizer/control_depend.cc
rename to mindspore/ccsrc/frontend/optimizer/control_depend.cc
index 0b5c85b1e01..8cc9bdb7f4f 100644
--- a/mindspore/ccsrc/optimizer/control_depend.cc
+++ b/mindspore/ccsrc/frontend/optimizer/control_depend.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/control_depend.h"
+#include "frontend/optimizer/control_depend.h"
 
 #include <vector>
 #include <list>
@@ -22,7 +22,7 @@
 #include <memory>
 #include <algorithm>
 
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/control_depend.h b/mindspore/ccsrc/frontend/optimizer/control_depend.h
similarity index 100%
rename from mindspore/ccsrc/optimizer/control_depend.h
rename to mindspore/ccsrc/frontend/optimizer/control_depend.h
diff --git a/mindspore/ccsrc/optimizer/cse.cc b/mindspore/ccsrc/frontend/optimizer/cse.cc
similarity index 99%
rename from mindspore/ccsrc/optimizer/cse.cc
rename to mindspore/ccsrc/frontend/optimizer/cse.cc
index 0b675cca721..4d968d6d742 100644
--- a/mindspore/ccsrc/optimizer/cse.cc
+++ b/mindspore/ccsrc/frontend/optimizer/cse.cc
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/cse.h"
+#include "frontend/optimizer/cse.h"
 #include <vector>
 #include <set>
 #include <unordered_map>
diff --git a/mindspore/ccsrc/optimizer/cse.h b/mindspore/ccsrc/frontend/optimizer/cse.h
similarity index 97%
rename from mindspore/ccsrc/optimizer/cse.h
rename to mindspore/ccsrc/frontend/optimizer/cse.h
index 57163cc5c9d..140f5927154 100644
--- a/mindspore/ccsrc/optimizer/cse.h
+++ b/mindspore/ccsrc/frontend/optimizer/cse.h
@@ -24,7 +24,7 @@
 #include <memory>
 #include "ir/anf.h"
 #include "ir/manager.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 /* namespace to support opt */
diff --git a/mindspore/ccsrc/optimizer/graph_kernel_reuse.cc b/mindspore/ccsrc/frontend/optimizer/graph_kernel_reuse.cc
similarity index 98%
rename from mindspore/ccsrc/optimizer/graph_kernel_reuse.cc
rename to mindspore/ccsrc/frontend/optimizer/graph_kernel_reuse.cc
index dc20ad925ec..c157777040e 100644
--- a/mindspore/ccsrc/optimizer/graph_kernel_reuse.cc
+++ b/mindspore/ccsrc/frontend/optimizer/graph_kernel_reuse.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/graph_kernel_reuse.h"
+#include "frontend/optimizer/graph_kernel_reuse.h"
 #include <vector>
 #include <algorithm>
 #include <string>
diff --git a/mindspore/ccsrc/optimizer/graph_kernel_reuse.h b/mindspore/ccsrc/frontend/optimizer/graph_kernel_reuse.h
similarity index 93%
rename from mindspore/ccsrc/optimizer/graph_kernel_reuse.h
rename to mindspore/ccsrc/frontend/optimizer/graph_kernel_reuse.h
index ed5cc93d184..a79ef3ce6d7 100644
--- a/mindspore/ccsrc/optimizer/graph_kernel_reuse.h
+++ b/mindspore/ccsrc/frontend/optimizer/graph_kernel_reuse.h
@@ -17,12 +17,11 @@
 #ifndef MINDSPORE_CCSRC_OPTIMIZER_GRAPH_KERNEL_OP_REUSE_H
 #define MINDSPORE_CCSRC_OPTIMIZER_GRAPH_KERNEL_OP_REUSE_H
 
-#include <mindspore/ccsrc/session/anf_runtime_algorithm.h>
 #include <unordered_map>
 #include <string>
 #include <vector>
-
-#include "optimizer/optimizer.h"
+#include "mindspore/ccsrc/backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass.cc b/mindspore/ccsrc/frontend/optimizer/irpass.cc
similarity index 85%
rename from mindspore/ccsrc/optimizer/irpass.cc
rename to mindspore/ccsrc/frontend/optimizer/irpass.cc
index 166151751ff..efc3795a4cc 100644
--- a/mindspore/ccsrc/optimizer/irpass.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass.cc
@@ -16,33 +16,33 @@
 
 #include <string>
 
-#include "optimizer/irpass.h"
-#include "optimizer/irpass/arithmetic_simplify.h"
-#include "optimizer/irpass/branch_culling.h"
-#include "optimizer/irpass/cast_eliminate.h"
-#include "optimizer/irpass/convert.h"
-#include "optimizer/irpass/env_item_eliminate.h"
-#include "optimizer/irpass/grad_var_prepare.h"
-#include "optimizer/irpass/gradient_eliminate.h"
-#include "optimizer/irpass/inline.h"
-#include "optimizer/irpass/incorporate_call.h"
-#include "optimizer/irpass/incorporate_getitem.h"
-#include "optimizer/irpass/item_tuple_eliminate.h"
-#include "optimizer/irpass/mark_interface_fusion.h"
-#include "optimizer/irpass/merge_addn.h"
-#include "optimizer/irpass/minmax_grad.h"
-#include "optimizer/irpass/param_replace.h"
-#include "optimizer/irpass/partial_eliminate.h"
-#include "optimizer/irpass/reduce_eliminate.h"
-#include "optimizer/irpass/ref_eliminate.h"
-#include "optimizer/irpass/reshape_eliminate.h"
-#include "optimizer/irpass/special_op_eliminate.h"
-#include "optimizer/irpass/specialize_transform.h"
-#include "optimizer/irpass/symbol_resolver.h"
-#include "optimizer/irpass/tile_eliminate.h"
-#include "optimizer/irpass/transpose_eliminate.h"
-#include "optimizer/opt.h"
-#include "optimizer/irpass/indexed_slices_eliminate.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/irpass/arithmetic_simplify.h"
+#include "frontend/optimizer/irpass/branch_culling.h"
+#include "frontend/optimizer/irpass/cast_eliminate.h"
+#include "frontend/optimizer/irpass/convert.h"
+#include "frontend/optimizer/irpass/env_item_eliminate.h"
+#include "frontend/optimizer/irpass/grad_var_prepare.h"
+#include "frontend/optimizer/irpass/gradient_eliminate.h"
+#include "frontend/optimizer/irpass/inline.h"
+#include "frontend/optimizer/irpass/incorporate_call.h"
+#include "frontend/optimizer/irpass/incorporate_getitem.h"
+#include "frontend/optimizer/irpass/item_tuple_eliminate.h"
+#include "frontend/optimizer/irpass/mark_interface_fusion.h"
+#include "frontend/optimizer/irpass/merge_addn.h"
+#include "frontend/optimizer/irpass/minmax_grad.h"
+#include "frontend/optimizer/irpass/param_replace.h"
+#include "frontend/optimizer/irpass/partial_eliminate.h"
+#include "frontend/optimizer/irpass/reduce_eliminate.h"
+#include "frontend/optimizer/irpass/ref_eliminate.h"
+#include "frontend/optimizer/irpass/reshape_eliminate.h"
+#include "frontend/optimizer/irpass/special_op_eliminate.h"
+#include "frontend/optimizer/irpass/specialize_transform.h"
+#include "frontend/optimizer/irpass/symbol_resolver.h"
+#include "frontend/optimizer/irpass/tile_eliminate.h"
+#include "frontend/optimizer/irpass/transpose_eliminate.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/optimizer/irpass/indexed_slices_eliminate.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass.h b/mindspore/ccsrc/frontend/optimizer/irpass.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/irpass.h
rename to mindspore/ccsrc/frontend/optimizer/irpass.h
index 782eae61240..4af8c0789dc 100644
--- a/mindspore/ccsrc/optimizer/irpass.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass.h
@@ -19,8 +19,8 @@
 
 #include <memory>
 
-#include "optimizer/optimizer.h"
-#include "optimizer/opt.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/opt.h"
 #include "ir/visitor.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.cc b/mindspore/ccsrc/frontend/optimizer/irpass/arithmetic_simplify.cc
similarity index 98%
rename from mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.cc
rename to mindspore/ccsrc/frontend/optimizer/irpass/arithmetic_simplify.cc
index b111a6b67aa..83f7fae5826 100644
--- a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/arithmetic_simplify.cc
@@ -19,13 +19,13 @@
 #include <vector>
 #include <functional>
 
-#include "optimizer/irpass/arithmetic_simplify.h"
+#include "frontend/optimizer/irpass/arithmetic_simplify.h"
 #include "ir/optimizer_caller.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/irpass/prim_eliminate.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/irpass/prim_eliminate.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h b/mindspore/ccsrc/frontend/optimizer/irpass/arithmetic_simplify.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/arithmetic_simplify.h
index f4bdb0d6559..3088231396d 100644
--- a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/arithmetic_simplify.h
@@ -23,10 +23,10 @@
 
 #include "ir/optimizer_caller.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/irpass/prim_eliminate.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/irpass/prim_eliminate.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/branch_culling.cc b/mindspore/ccsrc/frontend/optimizer/irpass/branch_culling.cc
similarity index 99%
rename from mindspore/ccsrc/optimizer/irpass/branch_culling.cc
rename to mindspore/ccsrc/frontend/optimizer/irpass/branch_culling.cc
index 726f4a28b01..dc580f6b633 100644
--- a/mindspore/ccsrc/optimizer/irpass/branch_culling.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/branch_culling.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/irpass/branch_culling.h"
+#include "frontend/optimizer/irpass/branch_culling.h"
 
 #include <memory>
 #include <utility>
@@ -22,7 +22,7 @@
 
 #include "ir/func_graph.h"
 #include "ir/func_graph_cloner.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/branch_culling.h b/mindspore/ccsrc/frontend/optimizer/irpass/branch_culling.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/irpass/branch_culling.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/branch_culling.h
index 2b5b30bdbfd..b3f3fe4733a 100644
--- a/mindspore/ccsrc/optimizer/irpass/branch_culling.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/branch_culling.h
@@ -24,8 +24,8 @@
 #include "ir/func_graph_cloner.h"
 #include "ir/optimizer_caller.h"
 #include "ir/pattern_matcher.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/cast_eliminate.cc b/mindspore/ccsrc/frontend/optimizer/irpass/cast_eliminate.cc
similarity index 90%
rename from mindspore/ccsrc/optimizer/irpass/cast_eliminate.cc
rename to mindspore/ccsrc/frontend/optimizer/irpass/cast_eliminate.cc
index a497f3d5bd9..ddb84806e1d 100644
--- a/mindspore/ccsrc/optimizer/irpass/cast_eliminate.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/cast_eliminate.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "optimizer/irpass/cast_eliminate.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass/cast_eliminate.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/func_graph.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/cast_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/cast_eliminate.h
similarity index 96%
rename from mindspore/ccsrc/optimizer/irpass/cast_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/cast_eliminate.h
index d98d0b677b3..d5222d43103 100644
--- a/mindspore/ccsrc/optimizer/irpass/cast_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/cast_eliminate.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_OPTIMIZER_IRPASS_CAST_ELIMINATE_H_
 
 #include "ir/visitor.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/convert.h b/mindspore/ccsrc/frontend/optimizer/irpass/convert.h
similarity index 94%
rename from mindspore/ccsrc/optimizer/irpass/convert.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/convert.h
index 3049bafb1ee..d887874203b 100644
--- a/mindspore/ccsrc/optimizer/irpass/convert.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/convert.h
@@ -19,11 +19,11 @@
 
 #include <vector>
 
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "ir/visitor.h"
 #include "ir/func_graph.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/env_item_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/env_item_eliminate.h
similarity index 99%
rename from mindspore/ccsrc/optimizer/irpass/env_item_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/env_item_eliminate.h
index 3f100dcaec3..14fd8743ff0 100644
--- a/mindspore/ccsrc/optimizer/irpass/env_item_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/env_item_eliminate.h
@@ -27,9 +27,9 @@
 #include "ir/func_graph_cloner.h"
 #include "ir/optimizer_caller.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "utils/symbolic.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc b/mindspore/ccsrc/frontend/optimizer/irpass/grad_var_prepare.cc
similarity index 95%
rename from mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc
rename to mindspore/ccsrc/frontend/optimizer/irpass/grad_var_prepare.cc
index 317d67e7926..44c1b62fa5b 100644
--- a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/grad_var_prepare.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "optimizer/irpass/grad_var_prepare.h"
+#include "frontend/optimizer/irpass/grad_var_prepare.h"
 #include <vector>
 #include <algorithm>
 #include <unordered_map>
 #include <memory>
 
-#include "operator/composite/composite.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/composite/composite.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
 #include "ir/func_graph.h"
 #include "ir/func_graph_cloner.h"
diff --git a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h b/mindspore/ccsrc/frontend/optimizer/irpass/grad_var_prepare.h
similarity index 90%
rename from mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/grad_var_prepare.h
index 9713017d122..f6992a87c6b 100644
--- a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/grad_var_prepare.h
@@ -22,10 +22,10 @@
 #include <unordered_map>
 #include <memory>
 
-#include "operator/composite/composite.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/composite/composite.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
 #include "ir/func_graph.h"
 #include "ir/func_graph_cloner.h"
diff --git a/mindspore/ccsrc/optimizer/irpass/gradient_eliminate.cc b/mindspore/ccsrc/frontend/optimizer/irpass/gradient_eliminate.cc
similarity index 97%
rename from mindspore/ccsrc/optimizer/irpass/gradient_eliminate.cc
rename to mindspore/ccsrc/frontend/optimizer/irpass/gradient_eliminate.cc
index 3347fa9dc0f..0d98cffa371 100644
--- a/mindspore/ccsrc/optimizer/irpass/gradient_eliminate.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/gradient_eliminate.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/irpass/gradient_eliminate.h"
+#include "frontend/optimizer/irpass/gradient_eliminate.h"
 
 #include <utility>
 
diff --git a/mindspore/ccsrc/optimizer/irpass/gradient_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/gradient_eliminate.h
similarity index 91%
rename from mindspore/ccsrc/optimizer/irpass/gradient_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/gradient_eliminate.h
index 671d9bde496..82312d9e373 100644
--- a/mindspore/ccsrc/optimizer/irpass/gradient_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/gradient_eliminate.h
@@ -21,12 +21,12 @@
 #include <algorithm>
 #include <memory>
 
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "ir/visitor.h"
 #include "common/utils.h"
-#include "operator/ops.h"
-#include "optimizer/ad/grad.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/ad/grad.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/incorporate_call.h b/mindspore/ccsrc/frontend/optimizer/irpass/incorporate_call.h
similarity index 97%
rename from mindspore/ccsrc/optimizer/irpass/incorporate_call.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/incorporate_call.h
index 5842b7bfd69..2f6404458fb 100644
--- a/mindspore/ccsrc/optimizer/irpass/incorporate_call.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/incorporate_call.h
@@ -22,12 +22,12 @@
 #include <unordered_map>
 #include <memory>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
 #include "ir/func_graph.h"
 #include "ir/func_graph_cloner.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/incorporate_getitem.h b/mindspore/ccsrc/frontend/optimizer/irpass/incorporate_getitem.h
similarity index 99%
rename from mindspore/ccsrc/optimizer/irpass/incorporate_getitem.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/incorporate_getitem.h
index b6c8fb0e18e..828e205e4f8 100644
--- a/mindspore/ccsrc/optimizer/irpass/incorporate_getitem.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/incorporate_getitem.h
@@ -27,9 +27,9 @@
 #include "ir/func_graph_cloner.h"
 #include "ir/optimizer_caller.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/indexed_slices_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/indexed_slices_eliminate.h
similarity index 94%
rename from mindspore/ccsrc/optimizer/irpass/indexed_slices_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/indexed_slices_eliminate.h
index 630d567549f..dfe345fe01f 100644
--- a/mindspore/ccsrc/optimizer/irpass/indexed_slices_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/indexed_slices_eliminate.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <algorithm>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/inline.h b/mindspore/ccsrc/frontend/optimizer/irpass/inline.h
similarity index 95%
rename from mindspore/ccsrc/optimizer/irpass/inline.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/inline.h
index 64f192347cd..8cafb268b46 100644
--- a/mindspore/ccsrc/optimizer/irpass/inline.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/inline.h
@@ -21,12 +21,12 @@
 #include <utility>
 #include <algorithm>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
 #include "ir/func_graph.h"
 #include "ir/func_graph_cloner.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
@@ -39,7 +39,7 @@ class ReplaceApplicator : public AnfVisitor {
     }
 
     auto fg = GetValueNode<FuncGraphPtr>(node);
-    if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE)) {
+    if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE) || fg->stub()) {
       return nullptr;
     }
 
@@ -110,7 +110,7 @@ class InlinerBase : public AnfVisitor {
 
     // G
     auto fg = GetValueNode<FuncGraphPtr>(inputs[0]);
-    if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE)) {
+    if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE) || fg->stub()) {
       return nullptr;
     }
     // Do not inline GraphKernel to Cell.
diff --git a/mindspore/ccsrc/optimizer/irpass/item_tuple_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/item_tuple_eliminate.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/irpass/item_tuple_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/item_tuple_eliminate.h
index 202951a2541..acd6844ee74 100644
--- a/mindspore/ccsrc/optimizer/irpass/item_tuple_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/item_tuple_eliminate.h
@@ -23,9 +23,9 @@
 
 #include "ir/optimizer_caller.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/mark_interface_fusion.h b/mindspore/ccsrc/frontend/optimizer/irpass/mark_interface_fusion.h
similarity index 92%
rename from mindspore/ccsrc/optimizer/irpass/mark_interface_fusion.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/mark_interface_fusion.h
index 6f2bcc187fb..8d3839bd9e1 100644
--- a/mindspore/ccsrc/optimizer/irpass/mark_interface_fusion.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/mark_interface_fusion.h
@@ -21,13 +21,13 @@
 #include <sstream>
 #include <unordered_map>
 
-#include "session/anf_runtime_algorithm.h"
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/graph_utils.h"
-#include "operator/composite/composite.h"
+#include "frontend/operator/composite/composite.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/merge_addn.h b/mindspore/ccsrc/frontend/optimizer/irpass/merge_addn.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/irpass/merge_addn.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/merge_addn.h
index e1e4b8878bc..a3cf6e22317 100644
--- a/mindspore/ccsrc/optimizer/irpass/merge_addn.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/merge_addn.h
@@ -21,10 +21,10 @@
 #include <algorithm>
 #include <memory>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/minmax_grad.h b/mindspore/ccsrc/frontend/optimizer/irpass/minmax_grad.h
similarity index 96%
rename from mindspore/ccsrc/optimizer/irpass/minmax_grad.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/minmax_grad.h
index a426a9fb9bf..658a287234e 100644
--- a/mindspore/ccsrc/optimizer/irpass/minmax_grad.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/minmax_grad.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <memory>
 
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/param_replace.h b/mindspore/ccsrc/frontend/optimizer/irpass/param_replace.h
similarity index 92%
rename from mindspore/ccsrc/optimizer/irpass/param_replace.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/param_replace.h
index c0c4c832d7a..999376e528f 100644
--- a/mindspore/ccsrc/optimizer/irpass/param_replace.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/param_replace.h
@@ -19,11 +19,11 @@
 
 #include <memory>
 
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "pipeline/parse/parse.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/parse/parse.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/partial_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/partial_eliminate.h
similarity index 95%
rename from mindspore/ccsrc/optimizer/irpass/partial_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/partial_eliminate.h
index bc8ef9d8f3a..32fc5abc7d0 100644
--- a/mindspore/ccsrc/optimizer/irpass/partial_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/partial_eliminate.h
@@ -21,10 +21,10 @@
 #include <algorithm>
 #include <memory>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/prim_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/prim_eliminate.h
similarity index 94%
rename from mindspore/ccsrc/optimizer/irpass/prim_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/prim_eliminate.h
index 725c30a6b9d..d8c96825c9a 100644
--- a/mindspore/ccsrc/optimizer/irpass/prim_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/prim_eliminate.h
@@ -17,8 +17,8 @@
 #ifndef MINDSPORE_CCSRC_OPTIMIZER_IRPASS_PRIM_ELIMINATE_H_
 #define MINDSPORE_CCSRC_OPTIMIZER_IRPASS_PRIM_ELIMINATE_H_
 
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "ir/visitor.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/optimizer/irpass/reduce_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/reduce_eliminate.h
similarity index 96%
rename from mindspore/ccsrc/optimizer/irpass/reduce_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/reduce_eliminate.h
index d2e1d15f913..78b7d3f4f1c 100644
--- a/mindspore/ccsrc/optimizer/irpass/reduce_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/reduce_eliminate.h
@@ -21,11 +21,11 @@
 #include <algorithm>
 #include <memory>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/dshape.h"
+#include "frontend/operator/ops.h"
+#include "abstract/dshape.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/ref_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/ref_eliminate.h
new file mode 100644
index 00000000000..86eb4e761d2
--- /dev/null
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/ref_eliminate.h
@@ -0,0 +1,94 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_OPTIMIZER_IRPASS_REF_ELIMINATE_H_
+#define MINDSPORE_CCSRC_OPTIMIZER_IRPASS_REF_ELIMINATE_H_
+
+#include <memory>
+
+#include "ir/pattern_matcher.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+
+namespace mindspore {
+namespace opt {
+namespace irpass {
+// {prim::kPrimMakeRef, X, Y, Z} -> Y
+class MakeRefEliminater : public OptimizerCaller {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
+    PatternNode<AnfNodePtr> x, y, z;
+    MATCH_REPLACE(node, PPrimitive(prim::kPrimMakeRef, x, y, z), y);
+    return nullptr;
+  }
+};
+
+// {prim::kPrimGetRefValue, Parameter} -> Parameter
+// {prim::kPrimGetRefOrigin, Parameter} -> Parameter
+class GetRefParamEliminater : public OptimizerCaller {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
+    PatternNode<AnfNodePtr> x;
+    MATCH_REPLACE_IF(node, PPrimitive(prim::kPrimGetRefValue, x), x, x.CheckFunc(IsParam, node));
+    MATCH_REPLACE_IF(node, PPrimitive(prim::kPrimGetRefOrigin, x), x, x.CheckFunc(IsParam, node));
+    return nullptr;
+  }
+};
+
+// {prim::kPrimGetRefKey, {prim::kPrimMakeRef, X, Y, Z}} -> X
+// {prim::kPrimGetRefValue, {prim::kPrimMakeRef, X, Y, Z}} -> Y
+// {prim::kPrimGetRefOrigin, {prim::kPrimMakeRef, X, Y, Z}} -> Z
+class GetMakeRefEliminater : public OptimizerCaller {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
+    PatternNode<AnfNodePtr> x, y, z;
+    MATCH_REPLACE(node, PPrimitive(prim::kPrimGetRefKey, PPrimitive(prim::kPrimMakeRef, x, y, z)), x);
+    MATCH_REPLACE(node, PPrimitive(prim::kPrimGetRefValue, PPrimitive(prim::kPrimMakeRef, x, y, z)), y);
+    MATCH_REPLACE(node, PPrimitive(prim::kPrimGetRefOrigin, PPrimitive(prim::kPrimMakeRef, x, y, z)), z);
+
+    return nullptr;
+  }
+};
+
+// IsValueNode<RefKey>
+class ReplaceRefkeyByParam : public OptimizerCaller {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override {
+    auto RefKeyLambda = [&node, &optimizer]() -> AnfNodePtr {
+      auto refkey = GetValueNode<RefKeyPtr>(node);
+      auto resource = std::dynamic_pointer_cast<pipeline::Resource>(optimizer->resource());
+      MS_EXCEPTION_IF_NULL(resource);
+
+      auto top_graph = resource->func_graph();
+      MS_EXCEPTION_IF_NULL(top_graph);
+
+      for (const auto &tnode : top_graph->parameters()) {
+        auto para = tnode->cast<ParameterPtr>();
+        if (para != nullptr && para->name() == refkey->tag()) {
+          return para;
+        }
+      }
+      return nullptr;
+    };
+    PatternNode<AnfNodePtr> x;
+    MATCH_REPLACE_LAMBDA_IF(node, x, RefKeyLambda, x.CheckFunc(IsValueNode<RefKey>, node));
+    return nullptr;
+  }
+};
+}  // namespace irpass
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_OPTIMIZER_IRPASS_REF_ELIMINATE_H_
diff --git a/mindspore/ccsrc/optimizer/irpass/reshape_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/reshape_eliminate.h
similarity index 96%
rename from mindspore/ccsrc/optimizer/irpass/reshape_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/reshape_eliminate.h
index cafc8b796c4..27d4bdad3dc 100644
--- a/mindspore/ccsrc/optimizer/irpass/reshape_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/reshape_eliminate.h
@@ -22,10 +22,10 @@
 #include "ir/func_graph.h"
 #include "ir/optimizer_caller.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
-#include "pipeline/static_analysis/dshape.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+#include "abstract/dshape.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/special_op_eliminate.h
similarity index 97%
rename from mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/special_op_eliminate.h
index b6a4e1c8523..01efa85e8d0 100644
--- a/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/special_op_eliminate.h
@@ -25,10 +25,10 @@
 #include "ir/optimizer_caller.h"
 #include "ir/pattern_matcher.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "optimizer/irpass/prim_eliminate.h"
-#include "optimizer/optimizer.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/irpass/prim_eliminate.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/specialize_transform.h b/mindspore/ccsrc/frontend/optimizer/irpass/specialize_transform.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/irpass/specialize_transform.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/specialize_transform.h
index 3db9e7bd516..d8a15f6d832 100644
--- a/mindspore/ccsrc/optimizer/irpass/specialize_transform.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/specialize_transform.h
@@ -24,13 +24,13 @@
 #include <unordered_map>
 #include <unordered_set>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
 #include "ir/manager.h"
 #include "ir/func_graph.h"
 #include "ir/func_graph_cloner.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/symbol_resolver.h b/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h
similarity index 92%
rename from mindspore/ccsrc/optimizer/irpass/symbol_resolver.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h
index 7b35cf54513..de9e533550f 100644
--- a/mindspore/ccsrc/optimizer/irpass/symbol_resolver.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h
@@ -20,12 +20,12 @@
 #include <string>
 #include <memory>
 
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/parse/python_adapter.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/tile_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/tile_eliminate.h
similarity index 94%
rename from mindspore/ccsrc/optimizer/irpass/tile_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/tile_eliminate.h
index 86ac5bab734..f561e04c109 100644
--- a/mindspore/ccsrc/optimizer/irpass/tile_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/tile_eliminate.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <algorithm>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/irpass/transpose_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/transpose_eliminate.h
similarity index 94%
rename from mindspore/ccsrc/optimizer/irpass/transpose_eliminate.h
rename to mindspore/ccsrc/frontend/optimizer/irpass/transpose_eliminate.h
index de196ea619f..70b8898462e 100644
--- a/mindspore/ccsrc/optimizer/irpass/transpose_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/transpose_eliminate.h
@@ -20,10 +20,10 @@
 #include <vector>
 #include <algorithm>
 
-#include "optimizer/irpass.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/opt.cc b/mindspore/ccsrc/frontend/optimizer/opt.cc
similarity index 95%
rename from mindspore/ccsrc/optimizer/opt.cc
rename to mindspore/ccsrc/frontend/optimizer/opt.cc
index 462d08ad3c1..44917106fad 100644
--- a/mindspore/ccsrc/optimizer/opt.cc
+++ b/mindspore/ccsrc/frontend/optimizer/opt.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "optimizer/opt.h"
+#include "frontend/optimizer/opt.h"
 
 #include <algorithm>
 #include <deque>
@@ -24,7 +24,7 @@
 
 #include "ir/anf.h"
 #include "ir/manager.h"
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/optimizer.h"
 #include "utils/log_adapter.h"
 #include "utils/ordered_set.h"
 
@@ -84,13 +84,8 @@ AnfNodePtr Substitution::operator()(const OptimizerPtr &optimizer, const AnfNode
   }
 #endif
   if (optimizer != nullptr && optimizer->is_watch_renormalize() && result != nullptr) {
-    if (renorm_action_ == FORCE_RENORM) {
-      optimizer->add_node_to_renormalize(result);
-    } else {
-      // renorm_action_ is CHECK_RENORM
-      if (result->abstract() == nullptr) {
-        optimizer->add_node_to_renormalize(result);
-      }
+    if ((renorm_action_ == FORCE_RENORM) || (result->abstract() == nullptr)) {
+      optimizer->set_is_untyped_generated();
     }
   }
 
diff --git a/mindspore/ccsrc/optimizer/opt.h b/mindspore/ccsrc/frontend/optimizer/opt.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/opt.h
rename to mindspore/ccsrc/frontend/optimizer/opt.h
index 6601d969d28..f440cc71dca 100644
--- a/mindspore/ccsrc/optimizer/opt.h
+++ b/mindspore/ccsrc/frontend/optimizer/opt.h
@@ -24,7 +24,7 @@
 #include "ir/anf.h"
 #include "ir/func_graph.h"
 #include "ir/optimizer_caller.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 /* namespace to support opt */
diff --git a/mindspore/ccsrc/optimizer/optimizer.h b/mindspore/ccsrc/frontend/optimizer/optimizer.h
similarity index 92%
rename from mindspore/ccsrc/optimizer/optimizer.h
rename to mindspore/ccsrc/frontend/optimizer/optimizer.h
index dc423ed3147..a1f11e74d05 100644
--- a/mindspore/ccsrc/optimizer/optimizer.h
+++ b/mindspore/ccsrc/frontend/optimizer/optimizer.h
@@ -31,9 +31,9 @@
 #include "debug/anf_ir_dump.h"
 #include "debug/anf_ir_utils.h"
 #include "debug/trace.h"
-#include "optimizer/opt.h"
-#include "pipeline/resource.h"
-#include "pipeline/action.h"
+#include "frontend/optimizer/opt.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/action.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
@@ -89,12 +89,18 @@ using OptPassGroupMap = std::vector<std::pair<std::string, OptPassConfig>>;
 class Optimizer : public std::enable_shared_from_this<Optimizer> {
  public:
   Optimizer(const std::string &name, const pipeline::ResourceBasePtr &resource_ptr)
-      : name_(name), resource_(resource_ptr), run_only_once_(false), is_watch_renormalize_(false), is_enable_(true) {}
+      : name_(name),
+        resource_(resource_ptr),
+        run_only_once_(false),
+        is_watch_renormalize_(false),
+        is_enable_(true),
+        is_untyped_generated_(false) {}
   virtual ~Optimizer() = default;
 
   void Init(const OptPassGroupMap &passes, bool run_only_once) {
     run_only_once_ = run_only_once;
     is_watch_renormalize_ = false;
+    is_untyped_generated_ = false;
     is_on_debug_ = IS_OUTPUT_ON(mindspore::DEBUG);
 
     for (auto &iter : passes) {
@@ -154,14 +160,14 @@ class Optimizer : public std::enable_shared_from_this<Optimizer> {
                 // So generate the args_spec from parameters.
                 abstract::AbstractBasePtrList maybe_new_args_spec;
                 if (is_watch_renormalize_) {
-                  if (untyped_nodes_.size() > 0) {
+                  if (is_untyped_generated_) {
                     std::transform(func_graph->parameters().begin(), func_graph->parameters().end(),
                                    std::back_inserter(maybe_new_args_spec),
                                    [](AnfNodePtr param) -> AbstractBasePtr { return param->abstract(); });
                     func_graph = pipeline::Renormalize(resource_ptr, func_graph, maybe_new_args_spec);
-                    clear_untyped_nodes();
+                    clear_is_untyped_generated();
                   } else {
-                    MS_LOG(INFO) << "Optimizer::step: Skipping Renormalize because untyped_nodes_ is empty.";
+                    MS_LOG(INFO) << "Optimizer::step: Skipping Renormalize because is_untyped_generated_ is False.";
                   }
                 } else {
                   std::transform(func_graph->parameters().begin(), func_graph->parameters().end(),
@@ -206,13 +212,8 @@ class Optimizer : public std::enable_shared_from_this<Optimizer> {
 
   const std::string name() const { return name_; }
 
-  void add_node_to_renormalize(AnfNodePtr anode) {
-    if (std::find(untyped_nodes_.begin(), untyped_nodes_.end(), anode) == untyped_nodes_.end()) {
-      untyped_nodes_.push_back(anode);
-    }
-  }
-
-  void clear_untyped_nodes() { untyped_nodes_.clear(); }
+  void set_is_untyped_generated() { is_untyped_generated_ = true; }
+  void clear_is_untyped_generated() { is_untyped_generated_ = false; }
 
   void enable_watch_renormalize() { is_watch_renormalize_ = true; }
   void disable_watch_renormalize() { is_watch_renormalize_ = false; }
@@ -232,9 +233,9 @@ class Optimizer : public std::enable_shared_from_this<Optimizer> {
   std::vector<OptPass> passes_;
   std::vector<std::string> pass_names_;
   bool run_only_once_;
-  std::vector<AnfNodePtr> untyped_nodes_;
   bool is_watch_renormalize_;
   bool is_enable_;
+  bool is_untyped_generated_;
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/optimizer/pass_group.cc b/mindspore/ccsrc/frontend/optimizer/pass_group.cc
similarity index 97%
rename from mindspore/ccsrc/optimizer/pass_group.cc
rename to mindspore/ccsrc/frontend/optimizer/pass_group.cc
index 2d1ab07f7d0..3619396215b 100644
--- a/mindspore/ccsrc/optimizer/pass_group.cc
+++ b/mindspore/ccsrc/frontend/optimizer/pass_group.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "optimizer/pass_group.h"
+#include "frontend/optimizer/pass_group.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/pass_group.h b/mindspore/ccsrc/frontend/optimizer/pass_group.h
similarity index 98%
rename from mindspore/ccsrc/optimizer/pass_group.h
rename to mindspore/ccsrc/frontend/optimizer/pass_group.h
index 895f5a41289..08fa8018d68 100644
--- a/mindspore/ccsrc/optimizer/pass_group.h
+++ b/mindspore/ccsrc/frontend/optimizer/pass_group.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <memory>
 
-#include "optimizer/py_pass.h"
+#include "frontend/optimizer/py_pass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/py_pass.cc b/mindspore/ccsrc/frontend/optimizer/py_pass.cc
similarity index 97%
rename from mindspore/ccsrc/optimizer/py_pass.cc
rename to mindspore/ccsrc/frontend/optimizer/py_pass.cc
index 8ce348b22e4..c1bf40fcbb3 100644
--- a/mindspore/ccsrc/optimizer/py_pass.cc
+++ b/mindspore/ccsrc/frontend/optimizer/py_pass.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "optimizer/py_pass.h"
+#include "frontend/optimizer/py_pass.h"
 #include <unordered_set>
 #include <deque>
 #include <algorithm>
@@ -22,8 +22,8 @@
 
 #include "ir/func_graph.h"
 #include "ir/manager.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/resource.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/resource.h"
 
 namespace mindspore {
 namespace opt {
@@ -54,6 +54,7 @@ void ResolveFuncGraph_(const FuncGraphPtr &fg) {
   auto manager = Manage(fg, false);
   parse::python_adapter::set_use_signature_in_resolve(false);
   parse::ResolveAll(manager);
+  parse::python_adapter::set_use_signature_in_resolve(true);
 }
 
 bool Match(const AnfNodePtr &pattern, const AnfNodePtr &node, const NodeEquivPtr &equiv_ptr) {
diff --git a/mindspore/ccsrc/optimizer/py_pass.h b/mindspore/ccsrc/frontend/optimizer/py_pass.h
similarity index 100%
rename from mindspore/ccsrc/optimizer/py_pass.h
rename to mindspore/ccsrc/frontend/optimizer/py_pass.h
diff --git a/mindspore/ccsrc/optimizer/py_pass_manager.cc b/mindspore/ccsrc/frontend/optimizer/py_pass_manager.cc
similarity index 96%
rename from mindspore/ccsrc/optimizer/py_pass_manager.cc
rename to mindspore/ccsrc/frontend/optimizer/py_pass_manager.cc
index 1c36e93c9af..86d7067d1ca 100644
--- a/mindspore/ccsrc/optimizer/py_pass_manager.cc
+++ b/mindspore/ccsrc/frontend/optimizer/py_pass_manager.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "optimizer/py_pass_manager.h"
+#include "frontend/optimizer/py_pass_manager.h"
 
 #include <functional>
 #include <algorithm>
@@ -21,7 +21,7 @@
 #include <initializer_list>
 
 #include "ir/manager.h"
-#include "optimizer/pass_group.h"
+#include "frontend/optimizer/pass_group.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/optimizer/py_pass_manager.h b/mindspore/ccsrc/frontend/optimizer/py_pass_manager.h
similarity index 92%
rename from mindspore/ccsrc/optimizer/py_pass_manager.h
rename to mindspore/ccsrc/frontend/optimizer/py_pass_manager.h
index eaeefce2139..84868862a7f 100644
--- a/mindspore/ccsrc/optimizer/py_pass_manager.h
+++ b/mindspore/ccsrc/frontend/optimizer/py_pass_manager.h
@@ -23,13 +23,13 @@
 
 #include "ir/anf.h"
 #include "ir/func_graph.h"
-#include "ir/primitive.h"
+#include "ir/primitive_py.h"
 #include "utils/graph_utils.h"
 #include "common/utils.h"
 
-#include "pipeline/parse/resolve.h"
-#include "optimizer/py_pass.h"
-#include "optimizer/pass_group.h"
+#include "pipeline/jit/parse/resolve.h"
+#include "frontend/optimizer/py_pass.h"
+#include "frontend/optimizer/pass_group.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/ccsrc/parallel/CMakeLists.txt b/mindspore/ccsrc/frontend/parallel/CMakeLists.txt
similarity index 63%
rename from mindspore/ccsrc/parallel/CMakeLists.txt
rename to mindspore/ccsrc/frontend/parallel/CMakeLists.txt
index 940b1ed1d85..d2a099cf415 100644
--- a/mindspore/ccsrc/parallel/CMakeLists.txt
+++ b/mindspore/ccsrc/frontend/parallel/CMakeLists.txt
@@ -1,7 +1,8 @@
 file(GLOB_RECURSE _PARALLEL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+list(REMOVE_ITEM _PARALLEL_SRC_FILES  "ps/util.cc" "ps/scheduler.cc" "ps/optimizer_info.cc" "ps/optimizer_info_builder.cc")
 if (ENABLE_DUMP_PROTO)
     list(REMOVE_ITEM _PARALLEL_SRC_FILES "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
 endif ()
 
 set_property(SOURCE ${_PARALLEL_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PARALLEL)
-add_library(_mindspore_parallel_obj OBJECT ${_PARALLEL_SRC_FILES})
+add_library(_mindspore_frontend_parallel_obj OBJECT ${_PARALLEL_SRC_FILES})
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_fusion.cc b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/allreduce_fusion/allreduce_fusion.cc
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.cc
index 30173e533cc..70ae5a7d207 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_fusion.cc
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "parallel/allreduce_fusion/allreduce_fusion.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_fusion.h"
 #include <memory>
 #include <queue>
 #include <string>
 #include <unordered_set>
 #include "ir/func_graph.h"
-#include "parallel/costmodel_context.h"
-#include "parallel/graph_util/node_info.h"
-#include "parallel/status.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/costmodel_context.h"
+#include "frontend/parallel/graph_util/node_info.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/step_parallel.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_fusion.h b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.h
similarity index 96%
rename from mindspore/ccsrc/parallel/allreduce_fusion/allreduce_fusion.h
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.h
index 43a99350954..7383c477a6f 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_fusion.h
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.h
@@ -20,8 +20,8 @@
 #include <unordered_map>
 #include <vector>
 #include "ir/anf.h"
-#include "parallel/allreduce_fusion/allreduce_graph.h"
-#include "parallel/status.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_graph.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_graph.cc b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_graph.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/allreduce_fusion/allreduce_graph.cc
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_graph.cc
index 2a98a38add3..ca47b0fa978 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_graph.cc
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_graph.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "parallel/allreduce_fusion/allreduce_graph.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_graph.h"
 #include <algorithm>
 #include <functional>
 #include "ir/anf.h"
-#include "parallel/allreduce_fusion/allreduce_node.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_node.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_graph.h b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_graph.h
similarity index 97%
rename from mindspore/ccsrc/parallel/allreduce_fusion/allreduce_graph.h
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_graph.h
index b2084b735cb..a47039f070f 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_graph.h
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_graph.h
@@ -24,8 +24,8 @@
 #include <utility>
 #include <vector>
 #include "ir/anf.h"
-#include "parallel/allreduce_fusion/allreduce_node.h"
-#include "parallel/status.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_node.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_node.cc b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_node.cc
similarity index 96%
rename from mindspore/ccsrc/parallel/allreduce_fusion/allreduce_node.cc
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_node.cc
index 113d4ec59b6..1c478887dff 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_node.cc
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_node.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "parallel/allreduce_fusion/allreduce_node.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_node.h"
 #include <queue>
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_node.h b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_node.h
similarity index 98%
rename from mindspore/ccsrc/parallel/allreduce_fusion/allreduce_node.h
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_node.h
index db1c4e3f2ef..6538381f274 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/allreduce_node.h
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_node.h
@@ -22,7 +22,7 @@
 #include <unordered_set>
 #include <vector>
 #include "ir/anf.h"
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/step_allreduce_fusion.cc
similarity index 90%
rename from mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/step_allreduce_fusion.cc
index 999c4a85a90..b669fa7782c 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/step_allreduce_fusion.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "parallel/allreduce_fusion/step_allreduce_fusion.h"
+#include "frontend/parallel/allreduce_fusion/step_allreduce_fusion.h"
 #include <ctime>
 #include <string>
-#include "optimizer/optimizer.h"
-#include "parallel/allreduce_fusion/allreduce_fusion.h"
-#include "parallel/context.h"
-#include "parallel/graph_util/graph_info.h"
-#include "parallel/status.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_fusion.h"
+#include "frontend/parallel/context.h"
+#include "frontend/parallel/graph_util/graph_info.h"
+#include "frontend/parallel/status.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.h b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/step_allreduce_fusion.h
similarity index 96%
rename from mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.h
rename to mindspore/ccsrc/frontend/parallel/allreduce_fusion/step_allreduce_fusion.h
index 2343a7a2fef..2612e71984f 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.h
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/step_allreduce_fusion.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_PARALLEL_ALLREDUCE_FUSION_STEP_ALLREDUCE_FUSION_H_
 #define MINDSPORE_CCSRC_PARALLEL_ALLREDUCE_FUSION_STEP_ALLREDUCE_FUSION_H_
 
-#include "optimizer/optimizer.h"
+#include "frontend/optimizer/optimizer.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/costmodel.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/costmodel.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/auto_parallel/costmodel.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/costmodel.cc
index 65e9acf7147..531a5cd7f64 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/costmodel.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/costmodel.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
 #include <cmath>
 #include <numeric>
 #include <utility>
-#include "parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/costmodel.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/costmodel.h
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/costmodel.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/costmodel.h
index 8b92e18cd85..cc4508681bf 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/costmodel.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/costmodel.h
@@ -22,8 +22,8 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_info.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_info.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/dp_algo_costmodel.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/dp_algo_costmodel.cc
index 72451fab57c..9408596111d 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/dp_algo_costmodel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/dp_algo_costmodel.h"
+#include "frontend/parallel/auto_parallel/dp_algo_costmodel.h"
 
 #include <memory>
 #include <utility>
diff --git a/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/dp_algo_costmodel.h
similarity index 98%
rename from mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/dp_algo_costmodel.h
index e3fbfba5a77..812f375f0bd 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/dp_algo_costmodel.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/dp_algo_costmodel.h
@@ -21,8 +21,8 @@
 #include <utility>
 #include <vector>
 #include "ir/value.h"
-#include "parallel/auto_parallel/edge_costmodel.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/auto_parallel/edge_costmodel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/edge_costmodel.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/edge_costmodel.cc
index 60256a3ae32..e3f1de72077 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/edge_costmodel.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/edge_costmodel.h"
+#include "frontend/parallel/auto_parallel/edge_costmodel.h"
 
 #include <algorithm>
 #include <functional>
 #include <iterator>
 #include <utility>
-#include "parallel/auto_parallel/costmodel.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/edge_costmodel.h
similarity index 97%
rename from mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/edge_costmodel.h
index 2a5ed3b2a40..3fffd1b86d2 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/edge_costmodel.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/edge_costmodel.h
@@ -23,10 +23,10 @@
 #include <utility>
 #include <vector>
 #include "common/utils.h"
-#include "parallel/auto_parallel/costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/tensor_layout/tensor_info.h"
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/tensor_layout/tensor_info.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/graph_costmodel.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/graph_costmodel.cc
index 05be097e6a8..1c1fc3a700b 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/graph_costmodel.cc
@@ -21,9 +21,9 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/ops_info/reshape_info.h"
-#include "parallel/step_auto_parallel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/ops_info/reshape_info.h"
+#include "frontend/parallel/step_auto_parallel.h"
 
 namespace mindspore {
 namespace parallel {
@@ -41,7 +41,6 @@ bool FULLY_USE_DEVICES = DEFAULT_FULLY_USE_DEVICES;
 bool ELEMENTWISE_OP_STRA_FOLLOW = DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW;
 bool MULTI_SUBGRAPHS = DEFAULT_IS_MULTI_SUBGRAPHS;
 int32_t RUN_PHASE = DEFAULT_RUN_PHASE;
-constexpr char RESHAPEINFO[] = "ReshapeInfo";
 
 void CostGraph::SetDeviceMemoryAndCostParameter() {
   MS_EXCEPTION_IF_NULL(CostModelContext::GetInstance());
diff --git a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/graph_costmodel.h
similarity index 97%
rename from mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/graph_costmodel.h
index 3b8b389d813..87f13e3383d 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/graph_costmodel.h
@@ -22,12 +22,12 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "../../common.h"
+#include "mindspore/ccsrc/common.h"
 #include "common/utils.h"
-#include "parallel/auto_parallel/edge_costmodel.h"
-#include "parallel/costmodel_context.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/ops_info/tmp_identity_info.h"
+#include "frontend/parallel/auto_parallel/edge_costmodel.h"
+#include "frontend/parallel/costmodel_context.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/ops_info/tmp_identity_info.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.cc
index 8ebfdb7d130..aaf3fdff3c4 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
 
 #include <random>
 #include <algorithm>
-#include "parallel/device_matrix.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.h
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.h
index a08a4dbb131..dda597bd1f3 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/operator_costmodel.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/operator_costmodel.h
@@ -19,8 +19,8 @@
 
 #include <memory>
 #include <vector>
-#include "parallel/device_manager.h"
-#include "parallel/tensor_layout/tensor_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/tensor_layout/tensor_info.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.cc
index 9fb79ceee42..0a7e6c59d4d 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/rec_core/rec_cost.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_cost.h"
 
 #include <algorithm>
 #include <iostream>
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.h
similarity index 98%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.h
index fb4fc27164c..563bf4598ae 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_cost.h
@@ -23,8 +23,8 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/auto_parallel/rec_core/rec_graph.h"
-#include "parallel/auto_parallel/rec_core/rec_strategy.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_graph.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
index 9de71231c0b..68b776155ac 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/rec_core/rec_generate_strategy.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h"
 
 #include <algorithm>
 #include <memory>
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/rec_core/rec_parse_graph.h"
-#include "parallel/auto_parallel/rec_core/rec_partition.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_parse_graph.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_partition.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
@@ -168,12 +168,11 @@ std::vector<std::vector<int32_t>> PrepareGatherV2(const std::vector<std::shared_
                                                   const size_t iter_ops, std::vector<int32_t> s) {
   std::vector<std::vector<int32_t>> strategies;
 
-  int32_t axis = 0;
   auto axis_input = GetValue<int>(ops[iter_ops]->input_value().at(2));
   if (axis_input < 0) {
     axis_input += SizeToInt(ops[iter_ops]->inputs_tensor_info()[0].shape().size());
   }
-  axis = axis_input;
+  int32_t axis = axis_input;
   if (axis >= SizeToInt(s.size())) {
     MS_LOG(EXCEPTION) << "Failure: GatherV2' axis out of range.";
   }
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h
similarity index 98%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h
index e82efe67988..9acd05e0a98 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h
@@ -22,8 +22,8 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/auto_parallel/rec_core/rec_graph.h"
-#include "parallel/ops_info/operator_info.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_graph.h"
+#include "frontend/parallel/ops_info/operator_info.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_graph.h
similarity index 94%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_graph.h
index 9007218d152..15b82200162 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_graph.h
@@ -21,8 +21,8 @@
 #include <string>
 #include <vector>
 
-#include "parallel/auto_parallel/rec_core/rec_strategy.h"
-#include "parallel/auto_parallel/rec_core/rec_tensor.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_strategy.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_tensor.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.cc
similarity index 91%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.cc
index c0412e9108a..a393c825dfe 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.cc
@@ -14,18 +14,17 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/rec_core/rec_parse_graph.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_parse_graph.h"
 
 #include <algorithm>
 #include <memory>
 #include <string>
 #include <vector>
-#include <set>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/rec_core/rec_graph.h"
-#include "parallel/auto_parallel/rec_core/rec_tensor.h"
-#include "parallel/ops_info/operator_info.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_graph.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_tensor.h"
+#include "frontend/parallel/ops_info/operator_info.h"
 
 namespace mindspore {
 namespace parallel {
@@ -215,23 +214,16 @@ std::shared_ptr<Graph> EliminateGraph(const std::shared_ptr<Graph> &graph,
                                       const std::shared_ptr<std::vector<std::vector<size_t>>> &eli_list,
                                       const std::shared_ptr<std::vector<size_t>> &index_list) {
   MS_EXCEPTION_IF_NULL(graph);
-  static const std::set<OperatorType> elementwise_type = {
-    OperatorType::kRecReLU,      OperatorType::kRecLog,      OperatorType::kRecExp,         OperatorType::kRecAdd,
-    OperatorType::kRecElmWiseOp, OperatorType::kRecBiasAdd,  OperatorType::kRecSub,         OperatorType::kRecMul,
-    OperatorType::kRecDiv,       OperatorType::kRecSqueeze,  OperatorType::kRecReduce,      OperatorType::kRecCast,
-    OperatorType::kRecReshape,   OperatorType::kRecGatherV2, OperatorType::kRecArgWithValue};
   for (size_t node_index = 0; node_index < (size_t)graph->nodes.size(); node_index++) {
     auto type = graph->nodes[node_index].apply.op_type;
-    if (elementwise_type.find(type) != elementwise_type.end()) {
+    if (ElementWiseOpType.find(type) != ElementWiseOpType.end()) {
       Eliminate_Aux(node_index, graph, eli_list);
     }
   }
-
   index_list->reserve(graph->nodes.size());
   for (size_t i = 0; i < (size_t)graph->nodes.size(); i++) {
     index_list->push_back(i);
   }
-
   for (size_t i = 0; i < (size_t)eli_list->size(); i++) {
     if (eli_list->at(i)[0] >= index_list->size()) {
       MS_LOG(EXCEPTION) << "Failure: Operators' elements out of range.";
@@ -241,13 +233,11 @@ std::shared_ptr<Graph> EliminateGraph(const std::shared_ptr<Graph> &graph,
       index_list->at(j)--;
     }
   }
-
   std::shared_ptr<Graph> new_graph(new Graph);
   for (size_t i = 0; i < graph->nodes.size(); i++) {
     if (index_list->at(i) > SIZE_MAX / 2) {
       continue;
     }
-
     new_graph->nodes.push_back(graph->nodes[i]);
     auto *node_in = &new_graph->nodes[index_list->at(i)].node_in;
     for (size_t j = node_in->size(); j > 0; j--) {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.h
similarity index 90%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.h
index 66fc82b8cec..4d0c02f5fe2 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.h
@@ -22,12 +22,19 @@
 #include <string>
 #include <utility>
 #include <vector>
+#include <set>
 
-#include "parallel/auto_parallel/rec_core/rec_graph.h"
-#include "parallel/ops_info/operator_info.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_graph.h"
+#include "frontend/parallel/ops_info/operator_info.h"
 
 namespace mindspore {
 namespace parallel {
+static const std::set<OperatorType> ElementWiseOpType = {
+  OperatorType::kRecReLU,      OperatorType::kRecLog,      OperatorType::kRecExp,         OperatorType::kRecAdd,
+  OperatorType::kRecElmWiseOp, OperatorType::kRecBiasAdd,  OperatorType::kRecSub,         OperatorType::kRecMul,
+  OperatorType::kRecDiv,       OperatorType::kRecSqueeze,  OperatorType::kRecReduce,      OperatorType::kRecCast,
+  OperatorType::kRecReshape,   OperatorType::kRecGatherV2, OperatorType::kRecArgWithValue};
+
 const std::map<std::string, OperatorType> DictOpType{
   {MATMUL, OperatorType::kRecMatMul},
   {CONV2D, OperatorType::kRecConvolution},
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.cc
index d5200f54d8c..97d230a49f7 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/auto_parallel/rec_core/rec_partition.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_partition.h"
 
 #include <algorithm>
 #include <cmath>
@@ -25,7 +25,7 @@
 #include <vector>
 
 #include "ir/anf.h"
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.h
similarity index 87%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.h
index c98f3317f85..528163e4d3f 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.h
@@ -25,10 +25,10 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/auto_parallel/rec_core/rec_cost.h"
-#include "parallel/auto_parallel/rec_core/rec_graph.h"
-#include "parallel/auto_parallel/rec_core/rec_strategy.h"
-#include "parallel/status.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_cost.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_graph.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_strategy.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_strategy.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_strategy.h
similarity index 100%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_strategy.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_strategy.h
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_tensor.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_tensor.h
similarity index 94%
rename from mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_tensor.h
rename to mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_tensor.h
index 51ffca40230..315c52c867b 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_tensor.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_tensor.h
@@ -17,7 +17,7 @@
 #ifndef PARALLEL_AUTO_PARALLEL_REC_TENSOR_H_
 #define PARALLEL_AUTO_PARALLEL_REC_TENSOR_H_
 
-#include "parallel/auto_parallel/rec_core/rec_strategy.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/context.cc b/mindspore/ccsrc/frontend/parallel/context.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/context.cc
rename to mindspore/ccsrc/frontend/parallel/context.cc
index 062d814aa04..7164660be04 100644
--- a/mindspore/ccsrc/parallel/context.cc
+++ b/mindspore/ccsrc/frontend/parallel/context.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/context.h"
+#include "frontend/parallel/context.h"
 
 #include <algorithm>
 #include <cstdint>
@@ -25,7 +25,7 @@
 #include <map>
 
 #include "common/utils.h"
-#include "parallel/device_manager.h"
+#include "frontend/parallel/device_manager.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/context.h b/mindspore/ccsrc/frontend/parallel/context.h
similarity index 97%
rename from mindspore/ccsrc/parallel/context.h
rename to mindspore/ccsrc/frontend/parallel/context.h
index 6a503ca7eda..1bb40d5c290 100644
--- a/mindspore/ccsrc/parallel/context.h
+++ b/mindspore/ccsrc/frontend/parallel/context.h
@@ -23,13 +23,13 @@
 #include <string>
 #include <vector>
 
-#include "parallel/ops_info/ops_utils.h"
-#include "parallel/status.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
+#include "frontend/parallel/status.h"
 #include "utils/convert_utils.h"
 #include "ir/anf.h"
 #include "ir/func_graph.h"
 #include "debug/info.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/costmodel_context.cc b/mindspore/ccsrc/frontend/parallel/costmodel_context.cc
similarity index 96%
rename from mindspore/ccsrc/parallel/costmodel_context.cc
rename to mindspore/ccsrc/frontend/parallel/costmodel_context.cc
index 92aff295575..67d087eabdb 100644
--- a/mindspore/ccsrc/parallel/costmodel_context.cc
+++ b/mindspore/ccsrc/frontend/parallel/costmodel_context.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "parallel/costmodel_context.h"
+#include "frontend/parallel/costmodel_context.h"
 
 #include <memory>
 
-#include "parallel/allreduce_fusion/allreduce_fusion.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/allreduce_fusion/allreduce_fusion.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/costmodel_context.h b/mindspore/ccsrc/frontend/parallel/costmodel_context.h
similarity index 100%
rename from mindspore/ccsrc/parallel/costmodel_context.h
rename to mindspore/ccsrc/frontend/parallel/costmodel_context.h
diff --git a/mindspore/ccsrc/parallel/device.h b/mindspore/ccsrc/frontend/parallel/device.h
similarity index 97%
rename from mindspore/ccsrc/parallel/device.h
rename to mindspore/ccsrc/frontend/parallel/device.h
index 8c3174ae557..c9633623d26 100644
--- a/mindspore/ccsrc/parallel/device.h
+++ b/mindspore/ccsrc/frontend/parallel/device.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <utility>
 
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/device_manager.cc b/mindspore/ccsrc/frontend/parallel/device_manager.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/device_manager.cc
rename to mindspore/ccsrc/frontend/parallel/device_manager.cc
index 45628bec650..d3657afdb82 100644
--- a/mindspore/ccsrc/parallel/device_manager.cc
+++ b/mindspore/ccsrc/frontend/parallel/device_manager.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/device_manager.h"
+#include "frontend/parallel/device_manager.h"
 
 #include <algorithm>
 #include <functional>
@@ -23,7 +23,7 @@
 #include <unordered_set>
 #include <vector>
 
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/step_parallel.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/device_manager.h b/mindspore/ccsrc/frontend/parallel/device_manager.h
similarity index 95%
rename from mindspore/ccsrc/parallel/device_manager.h
rename to mindspore/ccsrc/frontend/parallel/device_manager.h
index 3afafe6a9c2..654acd9dffc 100644
--- a/mindspore/ccsrc/parallel/device_manager.h
+++ b/mindspore/ccsrc/frontend/parallel/device_manager.h
@@ -26,11 +26,11 @@
 #include <vector>
 
 #include "common/utils.h"
-#include "parallel/device.h"
-#include "parallel/device_matrix.h"
-#include "parallel/group_manager.h"
-#include "parallel/status.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/device.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/group_manager.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/strategy.h"
 #include "utils/convert_utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/device_matrix.cc b/mindspore/ccsrc/frontend/parallel/device_matrix.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/device_matrix.cc
rename to mindspore/ccsrc/frontend/parallel/device_matrix.cc
index 3c9467a2239..9cc85d97013 100644
--- a/mindspore/ccsrc/parallel/device_matrix.cc
+++ b/mindspore/ccsrc/frontend/parallel/device_matrix.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/device_matrix.h"
+#include "frontend/parallel/device_matrix.h"
 
 #include <algorithm>
 #include <cstdint>
@@ -23,8 +23,8 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/status.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/status.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/device_matrix.h b/mindspore/ccsrc/frontend/parallel/device_matrix.h
similarity index 97%
rename from mindspore/ccsrc/parallel/device_matrix.h
rename to mindspore/ccsrc/frontend/parallel/device_matrix.h
index 295bf338361..f1e7acec395 100644
--- a/mindspore/ccsrc/parallel/device_matrix.h
+++ b/mindspore/ccsrc/frontend/parallel/device_matrix.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 #include "utils/convert_utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/dynamic_creator.h b/mindspore/ccsrc/frontend/parallel/dynamic_creator.h
similarity index 96%
rename from mindspore/ccsrc/parallel/dynamic_creator.h
rename to mindspore/ccsrc/frontend/parallel/dynamic_creator.h
index f8e1d62d0ab..3ba40fade97 100644
--- a/mindspore/ccsrc/parallel/dynamic_creator.h
+++ b/mindspore/ccsrc/frontend/parallel/dynamic_creator.h
@@ -22,8 +22,8 @@
 #include <string>
 #include <utility>
 
-#include "parallel/ops_info/ops_info_head_files.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/ops_info/ops_info_head_files.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
@@ -132,6 +132,7 @@ REGISTER(SqueezeInfo);
 REGISTER(SigmoidCrossEntropyWithLogitsInfo);
 REGISTER(SquareInfo);
 REGISTER(GatherV2PInfo);
+REGISTER(EmbeddingLookupInfo);
 }  // namespace parallel
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc
new file mode 100644
index 00000000000..30c25e5f26b
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc
@@ -0,0 +1,175 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "frontend/parallel/graph_util/generate_graph.h"
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+
+using mindspore::tensor::Tensor;
+
+namespace mindspore {
+namespace parallel {
+std::string GetOpPythonPath(const OperatorName &op_name) {
+  // almost all ops are defined in two main paths
+  const std::string ops_module = OP_PATH;
+  const std::string inner_ops_module = INNER_OP_PATH;
+  py::module mod = py::module::import(common::SafeCStr(ops_module));
+  py::module inner_mod = py::module::import(common::SafeCStr(inner_ops_module));
+  if (!py::hasattr(mod, common::SafeCStr(op_name))) {
+    if (!py::hasattr(inner_mod, common::SafeCStr(op_name))) {
+      MS_LOG(EXCEPTION) << ops_module << " or " << inner_ops_module << " don't have op:" << op_name;
+    }
+    return inner_ops_module;
+  }
+  return ops_module;
+}
+
+ValuePtr CreatOpInstance(const OperatorAttrs &attrs, const OperatorName &op_name, const std::string &instance_name) {
+  std::string op_path = GetOpPythonPath(op_name);
+  py::module mod = py::module::import(common::SafeCStr(op_path));
+  if (!py::hasattr(mod, common::SafeCStr(op_name))) {
+    MS_LOG(ERROR) << "Failure: op_path:" << op_path << " don't have attr " << op_name;
+    return nullptr;
+  }
+  std::vector<py::object> arg_list;
+  (void)std::transform(attrs.begin(), attrs.end(), std::back_inserter(arg_list),
+                       [](const Attr &attr) { return ValuePtrToPyData(attr.second); });
+  py::object obj =
+    parse::python_adapter::CallPyFn(GET_OP_FUNCTION_PATH, GET_OP_FUNCTION, op_name, op_path, instance_name, arg_list);
+  ValuePtr op_instance = nullptr;
+  bool succ = parse::ConvertData(obj, &op_instance);
+  if (!succ) {
+    MS_LOG(ERROR) << "Failure:get Python op " << op_path << " from " << op_name << " fail";
+    return nullptr;
+  }
+  return op_instance;
+}
+
+AnfNodePtr ValuePtrToAnfNodePtr(const ValuePtr &value_ptr) {
+  auto value_node = NewValueNode(value_ptr);
+  MS_EXCEPTION_IF_NULL(value_node);
+  return value_node->cast<AnfNodePtr>();
+}
+
+static std::unordered_map<int32_t, AnfNodePtr> int_tensor_map = {};
+AnfNodePtr CreateInt32Tensor(int32_t value) {
+  auto it = int_tensor_map.find(value);
+  if (it != int_tensor_map.end()) {
+    return it->second;
+  }
+  mindspore::tensor::TensorPtr tensor_ptr = std::make_shared<tensor::Tensor>(py::int_(value), kInt32);
+  ValuePtr value_ptr = MakeValue(tensor_ptr);
+  auto anf_node_ptr = ValuePtrToAnfNodePtr(value_ptr);
+  int_tensor_map[value] = anf_node_ptr;
+  return anf_node_ptr;
+}
+
+AnfNodePtr CreatTypeInt(int32_t value) {
+  ValuePtr value_ptr = MakeValue(std::make_shared<Int>(value));
+  return ValuePtrToAnfNodePtr(value_ptr);
+}
+
+AnfNodePtr CreatInt32Imm(int32_t value) {
+  ValuePtr value_ptr = MakeValue(std::make_shared<Int32Imm>(value));
+  return ValuePtrToAnfNodePtr(value_ptr);
+}
+
+std::string GetInstanceNameByCNode(const CNodePtr &cnode) {
+  PrimitivePtr prim = GetValueNode<PrimitivePtr>(cnode->input(0));
+  if (!prim) {
+    MS_LOG(EXCEPTION) << "The first input of the cnode is not a PrimitivePtr.";
+  }
+  std::string instance_name = prim->instance_name();
+  return HashInstanceName(instance_name);
+}
+
+std::string HashInstanceName(const std::string &name) {
+  auto using_hash_name = common::GetEnv(USING_HASH_NAME);
+  std::string instance_name;
+  if ((using_hash_name.empty()) || (using_hash_name == "on")) {
+    instance_name = HashName(name);
+  } else {
+    instance_name = name;
+  }
+  return instance_name;
+}
+
+Status GenerateGraph::Init(const CNodePtr &cnode) {
+  if (!cnode) {
+    MS_LOG(ERROR) << "Init:cnode is nullptr";
+    return FAILED;
+  }
+  cnode_ = cnode;
+  func_graph_ = cnode->func_graph();
+  if (!func_graph_) {
+    MS_LOG(ERROR) << "Init:func_graph_ is nullptr";
+    return FAILED;
+  }
+  manager_ = func_graph_->manager();
+  if (!manager_) {
+    MS_LOG(ERROR) << "Init:manager_ is nullptr";
+    return FAILED;
+  }
+  scope_ = cnode_->scope();
+  if (!scope_) {
+    MS_LOG(ERROR) << "Init:scope_ is nullptr";
+    return FAILED;
+  }
+  virtual_input_node_ = std::make_shared<AnfNode>(nullptr);
+  virtual_input_node_->set_scope(scope_);
+  instance_name_base_ = GetInstanceNameByCNode(cnode_);
+  name_idx_ = 0;
+  return SUCCESS;
+}
+
+AnfNodePtr GenerateGraph::PushBack(const std::vector<AnfNodePtr> &inputs) {
+  CNodePtr cnode = func_graph_->NewCNode(inputs);  // using NewCNode to creat anfnode
+  MS_EXCEPTION_IF_NULL(cnode);
+  cnode->set_scope(scope_);
+  if (inputs.size() < 2) {
+    MS_LOG(EXCEPTION) << "inputs.size() must be more than 1";
+  }
+  (void)manager_->Replace(inputs.at(1), cnode);  // using Replace function to insert cnode after inputs[0]
+  auto new_anf_node_ptr = cnode->cast<AnfNodePtr>();
+  MS_EXCEPTION_IF_NULL(new_anf_node_ptr);
+  return new_anf_node_ptr;
+}
+
+AnfNodePtr GenerateGraph::NewOpInst(const OperatorName &op_name, const OperatorAttrs &attrs) {
+  name_idx_++;
+  ValuePtr pyop_instance = CreatOpInstance(attrs, op_name, instance_name_base_ + op_name + std::to_string(name_idx_));
+  if (pyop_instance == nullptr) {
+    MS_LOG(EXCEPTION) << "Failure:" << op_name << " CreatOpInstance failed";
+  }
+  auto value_node = NewValueNode(pyop_instance);
+  return value_node->cast<AnfNodePtr>();
+}
+
+AnfNodePtr GenerateGraph::NewOpInst(const OperatorName &op_name) {
+  name_idx_++;
+  OperatorAttrs attrs;
+  ValuePtr pyop_instance = CreatOpInstance(attrs, op_name, instance_name_base_ + std::to_string(name_idx_));
+  if (pyop_instance == nullptr) {
+    MS_LOG(EXCEPTION) << "Failure:" << op_name << " CreatOpInstance failed";
+  }
+  auto value_node = NewValueNode(pyop_instance);
+  return value_node->cast<AnfNodePtr>();
+}
+}  // namespace parallel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/parallel/graph_util/generate_graph.h b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h
similarity index 93%
rename from mindspore/ccsrc/parallel/graph_util/generate_graph.h
rename to mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h
index 71227a6e7b6..b3ef54a22ec 100644
--- a/mindspore/ccsrc/parallel/graph_util/generate_graph.h
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h
@@ -25,9 +25,9 @@
 #include <vector>
 
 #include "./common.h"
-#include "optimizer/opt.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/graph_util/get_parallel_info.cc b/mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc
similarity index 92%
rename from mindspore/ccsrc/parallel/graph_util/get_parallel_info.cc
rename to mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc
index 32cd106d8ea..21298697f44 100644
--- a/mindspore/ccsrc/parallel/graph_util/get_parallel_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/graph_util/get_parallel_info.h"
+#include "frontend/parallel/graph_util/get_parallel_info.h"
 
 #include <memory>
 #include <string>
@@ -23,10 +23,10 @@
 
 #include "common/utils.h"
 #include "ir/func_graph.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/graph_util/graph_info.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/graph_util/graph_info.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/graph_util/get_parallel_info.h b/mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.h
similarity index 100%
rename from mindspore/ccsrc/parallel/graph_util/get_parallel_info.h
rename to mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.h
diff --git a/mindspore/ccsrc/parallel/graph_util/graph_info.cc b/mindspore/ccsrc/frontend/parallel/graph_util/graph_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/graph_util/graph_info.cc
rename to mindspore/ccsrc/frontend/parallel/graph_util/graph_info.cc
index 175413c0fd7..45a88c3a237 100644
--- a/mindspore/ccsrc/parallel/graph_util/graph_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/graph_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/graph_util/graph_info.h"
+#include "frontend/parallel/graph_util/graph_info.h"
 #include "debug/anf_ir_dump.h"
 #include "debug/anf_ir_utils.h"
 #include "debug/draw.h"
diff --git a/mindspore/ccsrc/parallel/graph_util/graph_info.h b/mindspore/ccsrc/frontend/parallel/graph_util/graph_info.h
similarity index 100%
rename from mindspore/ccsrc/parallel/graph_util/graph_info.h
rename to mindspore/ccsrc/frontend/parallel/graph_util/graph_info.h
diff --git a/mindspore/ccsrc/parallel/graph_util/node_info.cc b/mindspore/ccsrc/frontend/parallel/graph_util/node_info.cc
similarity index 78%
rename from mindspore/ccsrc/parallel/graph_util/node_info.cc
rename to mindspore/ccsrc/frontend/parallel/graph_util/node_info.cc
index 7298b068322..e50df2818bf 100644
--- a/mindspore/ccsrc/parallel/graph_util/node_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/node_info.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "parallel/graph_util/node_info.h"
+#include "frontend/parallel/graph_util/node_info.h"
 
 #include <string>
 
 #include "ir/anf.h"
-#include "ir/param_value_py.h"
-#include "pipeline/parse/python_adapter.h"
+#include "ir/param_value.h"
+#include "pipeline/jit/parse/python_adapter.h"
 
 namespace mindspore {
 namespace parallel {
@@ -38,8 +38,7 @@ bool ParameterRequireGrad(const AnfNodePtr &node_ptr) {
   if (!para_ptr->has_default()) {
     return false;
   }
-  auto param_value = std::dynamic_pointer_cast<ParamValuePy>(para_ptr->default_param());
-  return py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "requires_grad"));
+  return para_ptr->default_param()->requires_grad();
 }
 }  // namespace parallel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/parallel/graph_util/node_info.h b/mindspore/ccsrc/frontend/parallel/graph_util/node_info.h
similarity index 97%
rename from mindspore/ccsrc/parallel/graph_util/node_info.h
rename to mindspore/ccsrc/frontend/parallel/graph_util/node_info.h
index bda268e582f..6037c466cdb 100644
--- a/mindspore/ccsrc/parallel/graph_util/node_info.h
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/node_info.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_PARALLEL_GRAPH_UTIL_NODE_INFO_H_
 
 #include <string>
-#include "ir/base.h"
+#include "base/base.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/group_manager.cc b/mindspore/ccsrc/frontend/parallel/group_manager.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/group_manager.cc
rename to mindspore/ccsrc/frontend/parallel/group_manager.cc
index 1562cbc140c..8929af7b0b2 100644
--- a/mindspore/ccsrc/parallel/group_manager.cc
+++ b/mindspore/ccsrc/frontend/parallel/group_manager.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "parallel/group_manager.h"
+#include "frontend/parallel/group_manager.h"
 
 #include <algorithm>
 #include <vector>
 
-#include "parallel/device_manager.h"
-#include "parallel/ops_info/ops_utils.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
 #include "utils/comm_manager.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/group_manager.h b/mindspore/ccsrc/frontend/parallel/group_manager.h
similarity index 96%
rename from mindspore/ccsrc/parallel/group_manager.h
rename to mindspore/ccsrc/frontend/parallel/group_manager.h
index f763d483ccb..b9cf9663b04 100644
--- a/mindspore/ccsrc/parallel/group_manager.h
+++ b/mindspore/ccsrc/frontend/parallel/group_manager.h
@@ -22,8 +22,8 @@
 #include <string>
 #include <vector>
 
-#include "parallel/device.h"
-#include "parallel/status.h"
+#include "frontend/parallel/device.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/node_check.cc b/mindspore/ccsrc/frontend/parallel/node_check.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/node_check.cc
rename to mindspore/ccsrc/frontend/parallel/node_check.cc
index 6b920f82ec6..de29417a4d5 100644
--- a/mindspore/ccsrc/parallel/node_check.cc
+++ b/mindspore/ccsrc/frontend/parallel/node_check.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "parallel/node_check.h"
+#include "frontend/parallel/node_check.h"
 
 #include <set>
 #include <string>
 
-#include "parallel/ops_info/ops_utils.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/node_check.h b/mindspore/ccsrc/frontend/parallel/node_check.h
similarity index 100%
rename from mindspore/ccsrc/parallel/node_check.h
rename to mindspore/ccsrc/frontend/parallel/node_check.h
diff --git a/mindspore/ccsrc/parallel/ops_info/activation_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/ops_info/activation_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/activation_info.cc
index 6bc33677a6a..35cac1480cf 100644
--- a/mindspore/ccsrc/parallel/ops_info/activation_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/activation_info.h"
+#include "frontend/parallel/ops_info/activation_info.h"
 
 #include <algorithm>
 #include <memory>
@@ -22,9 +22,9 @@
 #include <utility>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/costmodel.h"
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/activation_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/activation_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h
index cd66bf8e8b6..a74707efbe1 100644
--- a/mindspore/ccsrc/parallel/ops_info/activation_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/activation_info.h
@@ -23,9 +23,9 @@
 #include <unordered_map>
 #include <vector>
 
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/arithmetic_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/arithmetic_info.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/arithmetic_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/arithmetic_info.cc
index 02c26ea965b..1dd9c899ca8 100644
--- a/mindspore/ccsrc/parallel/ops_info/arithmetic_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/arithmetic_info.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/arithmetic_info.h"
+#include "frontend/parallel/ops_info/arithmetic_info.h"
 
 #include <algorithm>
 #include <memory>
 #include <utility>
 #include <vector>
 
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/arithmetic_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/arithmetic_info.h
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/arithmetic_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/arithmetic_info.h
index 27caacc30cd..1d347e4ec1c 100644
--- a/mindspore/ccsrc/parallel/ops_info/arithmetic_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/arithmetic_info.h
@@ -23,9 +23,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/batch_parallel_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/batch_parallel_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/batch_parallel_info.cc
index dac3b0a6759..64aceb90f67 100644
--- a/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/batch_parallel_info.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/batch_parallel_info.h"
+#include "frontend/parallel/ops_info/batch_parallel_info.h"
 
 #include <memory>
 #include <utility>
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/batch_parallel_info.h
similarity index 96%
rename from mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/batch_parallel_info.h
index db6cb206d51..0ba30c385a2 100644
--- a/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/batch_parallel_info.h
@@ -22,8 +22,8 @@
 #include <unordered_map>
 #include <vector>
 #include "ir/value.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/bias_add_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/bias_add_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/bias_add_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/bias_add_info.cc
index 005edaf7c73..e8b3afba164 100644
--- a/mindspore/ccsrc/parallel/ops_info/bias_add_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/bias_add_info.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/bias_add_info.h"
+#include "frontend/parallel/ops_info/bias_add_info.h"
 
 #include <algorithm>
 #include <memory>
 #include <utility>
 #include <vector>
 
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/bias_add_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/bias_add_info.h
similarity index 92%
rename from mindspore/ccsrc/parallel/ops_info/bias_add_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/bias_add_info.h
index 37f555a258c..3ede65a3ba9 100644
--- a/mindspore/ccsrc/parallel/ops_info/bias_add_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/bias_add_info.h
@@ -24,9 +24,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/comparison_function_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/comparison_function_info.h
similarity index 93%
rename from mindspore/ccsrc/parallel/ops_info/comparison_function_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/comparison_function_info.h
index 8dd2976b049..28298898466 100644
--- a/mindspore/ccsrc/parallel/ops_info/comparison_function_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/comparison_function_info.h
@@ -22,9 +22,9 @@
 #include <unordered_map>
 #include <vector>
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/arithmetic_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/arithmetic_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/dropout_do_mask_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/dropout_do_mask_info.cc
index e88868c772d..3b411ccb0ea 100644
--- a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/dropout_do_mask_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/dropout_do_mask_info.h"
+#include "frontend/parallel/ops_info/dropout_do_mask_info.h"
 
 #include <algorithm>
 #include <memory>
@@ -22,11 +22,11 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "pipeline/resource.h"
-#include "parallel/auto_parallel/costmodel.h"
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "pipeline/jit/resource.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/dropout_do_mask_info.h
similarity index 93%
rename from mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/dropout_do_mask_info.h
index c51a0a95135..ea7d5900711 100644
--- a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/dropout_do_mask_info.h
@@ -23,9 +23,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/elementary_function_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/elementary_function_info.h
similarity index 93%
rename from mindspore/ccsrc/parallel/ops_info/elementary_function_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/elementary_function_info.h
index 2172c5cd89f..e25da9e7430 100644
--- a/mindspore/ccsrc/parallel/ops_info/elementary_function_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/elementary_function_info.h
@@ -21,9 +21,9 @@
 #include <unordered_map>
 #include <vector>
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/gather_v2_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_info.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/gather_v2_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_info.cc
index 078be08128e..4e6e947f687 100644
--- a/mindspore/ccsrc/parallel/ops_info/gather_v2_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/gather_v2_info.h"
+#include "frontend/parallel/ops_info/gather_v2_info.h"
 
 #include <memory>
 #include <utility>
@@ -22,10 +22,10 @@
 
 #include "ir/tensor.h"
 #include "ir/value.h"
-#include "parallel/auto_parallel/costmodel.h"
-#include "parallel/device_matrix.h"
-#include "parallel/graph_util/generate_graph.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/graph_util/generate_graph.h"
+#include "frontend/parallel/strategy.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/ops_info/gather_v2_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_info.h
similarity index 94%
rename from mindspore/ccsrc/parallel/ops_info/gather_v2_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_info.h
index f7aeb6a0d9f..b3dc0fab87e 100644
--- a/mindspore/ccsrc/parallel/ops_info/gather_v2_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_info.h
@@ -23,9 +23,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_p_info.cc
similarity index 77%
rename from mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_p_info.cc
index 9fb8df08838..eb3c9900f82 100644
--- a/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_p_info.cc
@@ -14,37 +14,39 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/gather_v2_p_info.h"
+#include "frontend/parallel/ops_info/gather_v2_p_info.h"
 
 #include <vector>
 #include <numeric>
 #include <functional>
 #include <utility>
+#include <algorithm>
 
-#include "parallel/device_matrix.h"
-#include "parallel/graph_util/generate_graph.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/graph_util/generate_graph.h"
 
 namespace mindspore {
 namespace parallel {
 Status GatherV2PInfo::GetAttrs() {
-  // get axis, the third input is the axis, is a ValueNode
-  if (input_value_.at(2) == nullptr) {
-    MS_LOG(ERROR) << name_ << ": the third input value is nullptr, is not a ValueNode!";
-    return FAILED;
+  // get axis, the third input is the axis, is a ValueNode, embeddinglookup doesn't have axis.
+  if (target_ != CPU) {
+    if (input_value_.at(2) == nullptr) {
+      MS_LOG(ERROR) << name_ << ": the third input value is nullptr, is not a ValueNode!";
+      return FAILED;
+    }
+    auto axis = GetValue<int>(input_value_.at(2));
+    // if axis is negative then convert it to positive
+    auto params_shape = inputs_shape_.at(0);
+    if (params_shape.size() == 0) {
+      MS_LOG(ERROR) << name_ << ": params can not be a scalar!";
+      return FAILED;
+    }
+    if (axis < 0) {
+      axis += SizeToInt(inputs_shape_[0].size());
+    }
+    axis_ = axis;
   }
-  auto axis = GetValue<int>(input_value_.at(2));
-  // if axis is negative then convert it to positive
-  auto params_shape = inputs_shape_.at(0);
-  if (params_shape.size() == 0) {
-    MS_LOG(ERROR) << name_ << ": params can not be a scalar!";
-    return FAILED;
-  }
-  if (axis < 0) {
-    axis += SizeToInt(inputs_shape_[0].size());
-  }
-  axis_ = axis;
 
-  // get target
   auto target_iter = attrs_.find(TARGET);
   if (target_iter != attrs_.end()) {
     MS_EXCEPTION_IF_NULL(target_iter->second);
@@ -52,13 +54,54 @@ Status GatherV2PInfo::GetAttrs() {
       target_ = target_iter->second->cast<StringImmPtr>()->value();
     } else {
       MS_LOG(ERROR) << name_ << " : The value of target is not a string.";
-      return FAILED;
+    }
+  }
+  auto manual_split_iter = attrs_.find("manual_split");
+  if (manual_split_iter != attrs_.end()) {
+    param_split_shapes_.clear();
+    manual_split_ = true;
+    auto var = manual_split_iter->second->cast<ValueTuplePtr>();
+    MS_LOG(DEBUG) << "Extract manual split strategy " << manual_split_iter->second->ToString();
+
+    if (var->size() > 0) {
+      std::vector<ValuePtr> elements = var->value();
+      for (auto &ele : elements) {
+        if (ele->isa<ValueSequeue>()) {
+          auto value_tuple = ele->cast<ValueTuplePtr>();
+          std::vector<ValuePtr> value_vector = value_tuple->value();
+          if (value_vector.size() != 2) {
+            MS_LOG(ERROR) << "Failure: Size of manual_split element must be 2.";
+            return FAILED;
+          }
+          param_split_shapes_.push_back(static_cast<int32_t>(GetValue<int>(value_vector[0])));
+          index_offsets_.push_back(static_cast<int32_t>(GetValue<int>(value_vector[1])));
+        } else {
+          MS_LOG(ERROR) << "Failure: Manual split strategy's format is wrong! Need ValueSequeue";
+          return FAILED;
+        }
+      }
+
+      if (param_split_shapes_.empty()) {
+        MS_LOG(ERROR) << "Failed to extract param split strategy.";
+        return FAILED;
+      }
     }
   }
 
-  // target=CPU, axis must be 0
-  if (target_ == "CPU" && axis_ != 0) {
-    MS_LOG(ERROR) << name_ << ": target is CPU, axis must be 0, but got " << axis_;
+  return SUCCESS;
+}
+
+Status GatherV2PInfo::CheckManualSplit() {
+  auto param_shape = inputs_shape_.at(0);
+  int32_t split_shape_sum = std::accumulate(param_split_shapes_.begin(), param_split_shapes_.end(), 0,
+                                            [](int32_t s, int32_t shape) { return s + shape; });
+  if (split_shape_sum < param_shape.at(0)) {
+    MS_LOG(ERROR) << "Failure: Sum of splited shapes should not be smaller than param_shape.";
+    return FAILED;
+  }
+
+  if (std::any_of(index_offsets_.begin(), index_offsets_.end(), [](const int32_t &offset) { return offset < 0; })) {
+    MS_LOG(ERROR) << "Failure: Index offset must not less than 0.";
     return FAILED;
   }
 
@@ -103,6 +146,14 @@ Status GatherV2PInfo::CheckStrategy(const StrategyPtr &strategy) {
     return FAILED;
   }
 
+  if (manual_split_) {
+    if (CheckManualSplit() != SUCCESS) {
+      return FAILED;
+    }
+    // when using manual_split, no need to check belowings.
+    return SUCCESS;
+  }
+
   // axis != 0, param_shape(0)%(param_strategy(0)*param_strategy(axis)) must be 0
   if (axis_ != 0 && param_shape.at(0) % (param_strategy.at(0) * param_strategy.at(IntToSize(axis_))) != 0) {
     MS_LOG(DEBUG) << name_ << ": index_shape(0) can't be divided by (param_strategy(0)*param_strategy(axis)).";
@@ -130,6 +181,11 @@ Status GatherV2PInfo::CheckStrategy(const StrategyPtr &strategy) {
 }
 
 Status GatherV2PInfo::InferMirrorOps() {
+  // There is no mirror operators for manual split
+  if (manual_split_) {
+    return SUCCESS;
+  }
+
   mirror_ops_.clear();
   Shape input_a_tensor_map = inputs_tensor_map_.at(0);
   std::vector<Group> input_a_group;
@@ -160,6 +216,13 @@ Status GatherV2PInfo::InferDevMatrixShape() {
   // infer input dev_matrix_shape
   auto param_strategy = strategy_->GetInputDim().at(0);
   auto index_strategy = strategy_->GetInputDim().at(1);
+
+  if (manual_split_) {
+    dev_matrix_shape_ = param_strategy;
+    out_dev_matrix_shape_ = dev_matrix_shape_;
+    return SUCCESS;
+  }
+
   dev_matrix_shape_ = param_strategy;
 
   // param_strategy(axis)!=1,
@@ -195,6 +258,12 @@ Status GatherV2PInfo::InferDevMatrixShape() {
 }
 
 Status GatherV2PInfo::InferTensorMap() {
+  if (manual_split_) {
+    inputs_tensor_map_.push_back({1, 0});
+    inputs_tensor_map_.push_back({-1, 1});
+    outputs_tensor_map_.push_back({-1, 1, 0});
+    return SUCCESS;
+  }
   // infer input tensor map
   // param_strategy(axis) != 1
   size_t param_size = inputs_shape_.at(0).size();
@@ -261,8 +330,13 @@ Status GatherV2PInfo::InferTensorInfo() {
   Shape input_shape = inputs_shape_.at(0);
   Shape input_index_shape = inputs_shape_.at(1);
   Shape output_shape = outputs_shape_.at(0);
+  int32_t rank = g_device_manager->global_rank();
   // infer tensor layout
   TensorLayout input_tensor_layout, input_index_layout, output_tensor_layout;
+  if (manual_split_) {
+    input_shape[0] = param_split_shapes_[rank / dev_matrix_shape_[1]];
+    input_shape[0] = input_shape[0] * dev_matrix_shape_[0];
+  }
   if ((input_tensor_layout.InitFromVector(dev_matrix_shape_, inputs_tensor_map_.at(0), input_shape) != SUCCESS) ||
       (input_index_layout.InitFromVector(dev_matrix_shape_, inputs_tensor_map_.at(1), input_index_shape) != SUCCESS) ||
       (output_tensor_layout.InitFromVector(out_dev_matrix_shape_, outputs_tensor_map_.at(0), output_shape) !=
@@ -274,6 +348,9 @@ Status GatherV2PInfo::InferTensorInfo() {
   TensorInfo input_index_info(input_index_layout);
   TensorInfo output_tensor_info(output_tensor_layout);
 
+  Shape slice_shape = input_tensor_info.slice_shape();
+  MS_LOG(DEBUG) << "The fake slice shape is: " << ShapeToString(slice_shape);
+
   inputs_tensor_info_.push_back(input_tensor_info);
   inputs_tensor_info_.push_back(input_index_info);
   outputs_tensor_info_.push_back(output_tensor_info);
@@ -312,6 +389,19 @@ Status GatherV2PInfo::InferBias() {
   return FAILED;
 }
 
+Status GatherV2PInfo::InferOffset() {
+  CheckGlobalDeviceManager();
+  size_t rank = g_device_manager->global_rank();
+  if (rank < index_offsets_.size()) {
+    index_offset_ = index_offsets_.at(rank);
+    MS_LOG(DEBUG) << name_ << ": Device rank " << rank << ", Index Offset: " << index_offset_;
+    return SUCCESS;
+  }
+
+  MS_LOG(ERROR) << name_ << ": Get index offset failed, index offset size is" << index_offsets_.size();
+  return FAILED;
+}
+
 Status GatherV2PInfo::InferGroup() {
   auto param_strategy = strategy_->GetInputDim().at(0);
   size_t dim = IntToSize(axis_);
@@ -348,12 +438,9 @@ std::vector<int32_t> GetRankFromGroup(const Group &group) {
 
 Status GatherV2PInfo::InferForwardCommunication() {
   forward_op_.clear();
-  if (target_ != CPU) {
-    return SUCCESS;
-  }
   auto param_strategy = strategy_->GetInputDim().at(0);
-  // don't split axis, no need forward communication
-  if (param_strategy.at(IntToSize(axis_)) == 1) {
+  // don't split axis or target is not CPU, no need forward communication
+  if (target_ != CPU || param_strategy.at(IntToSize(axis_)) == 1) {
     return SUCCESS;
   }
   // split axis
@@ -362,38 +449,13 @@ Status GatherV2PInfo::InferForwardCommunication() {
     MS_LOG(ERROR) << name_ << ": Infer Group failed.";
     return FAILED;
   }
-  auto group_size = group_.GetDevNum();
   Attr attr_group;
-  if (host_reduce_scatter_) {
-    // group size <= 8
-    std::vector<int32_t> rank_list;
-    if (group_size <= 8) {
-      reduce_scatter_flag_ = false;
-      operator_name = HOST_REDUCE_SCATTER;
-      rank_list = GetRankFromGroup(group_);
-      attr_group = std::make_pair(GROUP, MakeValue(rank_list));
-    } else {
-      // group size > 8, don't support host reduce_scatter
-      reduce_scatter_flag_ = true;
-      split_num_ = SizeToInt(group_size / 8);
-      CheckGlobalDeviceManager();
-      operator_name = REDUCE_SCATTER;
-      int32_t rank = g_device_manager->global_rank();
-      size_t repeat = group_size / 8;
-      for (size_t i = 0; i < repeat; ++i) {
-        rank_list.push_back(rank + SizeToInt(i * 8));
-      }
-      Group g = g_device_manager->CreateGroup(rank_list);
-      attr_group = std::make_pair(GROUP, MakeValue(g.name()));
-    }
-  } else {
-    operator_name = REDUCE_SCATTER;
-    if (InferGroup() != SUCCESS) {
-      MS_LOG(ERROR) << name_ << ": Infer Group failed.";
-      return FAILED;
-    }
-    attr_group = std::make_pair(GROUP, MakeValue(group_.name()));
+  operator_name = REDUCE_SCATTER;
+  if (InferGroup() != SUCCESS) {
+    MS_LOG(ERROR) << name_ << ": Infer Group failed.";
+    return FAILED;
   }
+  attr_group = std::make_pair(GROUP, MakeValue(group_.name()));
   Attr attr_op = std::make_pair(OP, MakeValue(REDUCE_OP_SUM));
   OperatorAttrs attrs = {attr_op, attr_group};
   OperatorParams params;
@@ -410,6 +472,19 @@ Status GatherV2PInfo::ComputeReplaceGraph(const CNodePtr &cnode) {
     MS_LOG(ERROR) << "GenerateGraph Init failed";
     return FAILED;
   }
+  if (manual_split_) {
+    if (InferOffset() != SUCCESS) {
+      MS_LOG(ERROR) << name_ << ": Infer Bias failed.";
+      return FAILED;
+    }
+    auto sub = gen_g.PushBack({gen_g.NewOpInst(SUB), gen_g.virtual_input_node(), CreateInt32Tensor(index_offset_)});
+    auto gather_v2 =
+      gen_g.PushBack({gen_g.NewOpInst(replace_op_name_), gen_g.virtual_input_node(), sub, CreatInt32Imm(axis_)});
+    std::vector<std::pair<AnfNodePtr, int>> input_nodes = {std::make_pair(sub, 2), std::make_pair(gather_v2, 1)};
+    replace_graph_ = std::make_shared<std::pair<std::vector<std::pair<AnfNodePtr, int>>, AnfNodePtr>>(
+      std::make_pair(input_nodes, gather_v2));
+    return SUCCESS;
+  }
   if (InferBias() != SUCCESS) {
     MS_LOG(ERROR) << name_ << ": Infer Bias failed.";
     return FAILED;
@@ -444,6 +519,14 @@ Status GatherV2PInfo::ComputeReplaceGraph(const CNodePtr &cnode) {
 }
 
 ReplaceGraphPtr GatherV2PInfo::replace_graph(const CNodePtr &cnode) {
+  if (manual_split_) {
+    if (ComputeReplaceGraph(cnode) != SUCCESS) {
+      MS_LOG(ERROR) << name_ << ": ComputeReplaceGraph failed.";
+      return nullptr;
+    }
+    return replace_graph_;
+  }
+
   auto param_strategy = strategy_->GetInputDim().at(0);
   // target_ == CPU, no need to raplace graph
   if (target_ == CPU) {
@@ -464,10 +547,7 @@ Status GatherV2PInfo::ComputeReplaceOp() {
   OperatorName op_name = EMBEDDING_LOOKUP;
   OperatorAttrs attrs;
   Attr param_offset = std::make_pair("offset", MakeValue(bias_));
-  Attr param_flag = std::make_pair("reduce_scatter_flag", MakeValue(reduce_scatter_flag_));
-  Attr param_split_num = std::make_pair("split_num", MakeValue(split_num_));
-  OperatorParams params = {std::make_pair(param_offset, 3), std::make_pair(param_flag, 4),
-                           std::make_pair(param_split_num, 5)};
+  OperatorParams params = {std::make_pair(param_offset, 3)};
   OperatorArgs args = std::make_pair(attrs, params);
   Operator op = std::make_pair(op_name, args);
   replace_op_.push_back(op);
diff --git a/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_p_info.h
similarity index 78%
rename from mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_p_info.h
index 83868606d14..eb26c616d0f 100644
--- a/mindspore/ccsrc/parallel/ops_info/gather_v2_p_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_p_info.h
@@ -23,9 +23,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
@@ -36,6 +36,7 @@ class GatherV2PInfo : public OperatorInfo {
       : OperatorInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<GatherV2PCost>()),
         axis_(0),
         bias_(0),
+        index_offset_(0),
         slice_size_(0) {}
   ~GatherV2PInfo() override = default;
   Status Init(const StrategyPtr &strategy) override;
@@ -57,20 +58,23 @@ class GatherV2PInfo : public OperatorInfo {
 
  private:
   Status ComputeReplaceGraph(const CNodePtr &cnode);
+  Status CheckManualSplit();
   Status ComputeReplaceOp();
   Status InferBias();
+  Status InferOffset();
   Status InferGroup();
 
   int32_t axis_;
-  std::string target_;
+  std::string target_ = DEVICE;
   std::string replace_op_name_ = GATHERV2;
   int32_t bias_;
+  int32_t index_offset_;
   int32_t slice_size_;
   Shape out_dev_matrix_shape_;
   Group group_;
-  bool reduce_scatter_flag_ = false;
-  int32_t split_num_ = 1;
-  bool host_reduce_scatter_ = false;
+  bool manual_split_ = false;
+  std::vector<int32_t> param_split_shapes_;
+  std::vector<int32_t> index_offsets_;
 };
 
 class SparseGatherV2Info : public GatherV2PInfo {
@@ -83,6 +87,14 @@ class SparseGatherV2Info : public GatherV2PInfo {
  private:
   std::string replace_op_name_ = SPARSE_GATHERV2;
 };
+
+class EmbeddingLookupInfo : public GatherV2PInfo {
+ public:
+  EmbeddingLookupInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape,
+                      const PrimitiveAttrs &attrs)
+      : GatherV2PInfo(name, inputs_shape, outputs_shape, attrs) {}
+  ~EmbeddingLookupInfo() override = default;
+};
 }  // namespace parallel
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_PARALLEL_OPS_INFO_GATHER_V2_P_INFO_H_
diff --git a/mindspore/ccsrc/parallel/ops_info/get_next_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/get_next_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/get_next_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/get_next_info.cc
index 0fb49364f0a..36067321569 100644
--- a/mindspore/ccsrc/parallel/ops_info/get_next_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/get_next_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/get_next_info.h"
+#include "frontend/parallel/ops_info/get_next_info.h"
 
 #include <algorithm>
 #include <memory>
@@ -22,10 +22,10 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
-#include "parallel/context.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/context.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/get_next_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/get_next_info.h
similarity index 93%
rename from mindspore/ccsrc/parallel/ops_info/get_next_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/get_next_info.h
index ba209910b73..36e7a0fcb3f 100644
--- a/mindspore/ccsrc/parallel/ops_info/get_next_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/get_next_info.h
@@ -22,9 +22,9 @@
 #include <unordered_map>
 #include <vector>
 
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/l2_normalize_info.cc
similarity index 94%
rename from mindspore/ccsrc/parallel/ops_info/l2_normalize_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/l2_normalize_info.cc
index 8716997d9f9..126fdcf84e6 100644
--- a/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/l2_normalize_info.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/l2_normalize_info.h"
+#include "frontend/parallel/ops_info/l2_normalize_info.h"
 
 #include <algorithm>
 #include <memory>
 #include <utility>
 #include <vector>
 
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/l2_normalize_info.h
similarity index 90%
rename from mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/l2_normalize_info.h
index ca063d01d8a..c74dde4b4bb 100644
--- a/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/l2_normalize_info.h
@@ -23,9 +23,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/layer_norm_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/layer_norm_info.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/layer_norm_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/layer_norm_info.cc
index 5bdd24090fe..62d7c6d61e8 100644
--- a/mindspore/ccsrc/parallel/ops_info/layer_norm_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/layer_norm_info.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/layer_norm_info.h"
+#include "frontend/parallel/ops_info/layer_norm_info.h"
 #include <algorithm>
 #include <vector>
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/layer_norm_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/layer_norm_info.h
similarity index 94%
rename from mindspore/ccsrc/parallel/ops_info/layer_norm_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/layer_norm_info.h
index 50117b81853..9ee11bb215f 100644
--- a/mindspore/ccsrc/parallel/ops_info/layer_norm_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/layer_norm_info.h
@@ -22,9 +22,9 @@
 #include <unordered_map>
 #include <vector>
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/loss_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/loss_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/loss_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/loss_info.cc
index 0ba325c0cd5..889f204fb0b 100644
--- a/mindspore/ccsrc/parallel/ops_info/loss_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/loss_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/loss_info.h"
+#include "frontend/parallel/ops_info/loss_info.h"
 
 #include <algorithm>
 #include <memory>
@@ -22,9 +22,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/device_matrix.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/loss_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/loss_info.h
similarity index 94%
rename from mindspore/ccsrc/parallel/ops_info/loss_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/loss_info.h
index 2679c2d62b4..7e5478bedf0 100644
--- a/mindspore/ccsrc/parallel/ops_info/loss_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/loss_info.h
@@ -23,9 +23,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/matmul_info.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/matmul_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/matmul_info.cc
index 7d1ab8dc0fa..60a3d60b392 100644
--- a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/matmul_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
 
 #include <algorithm>
 #include <functional>
@@ -24,10 +24,10 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/matmul_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/matmul_info.h
similarity index 95%
rename from mindspore/ccsrc/parallel/ops_info/matmul_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/matmul_info.h
index cb3e54a0489..d4e144c2b64 100644
--- a/mindspore/ccsrc/parallel/ops_info/matmul_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/matmul_info.h
@@ -24,9 +24,9 @@
 
 #include "common/utils.h"
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/onehot_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/onehot_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/onehot_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/onehot_info.cc
index ea2d0451040..15acb085f55 100644
--- a/mindspore/ccsrc/parallel/ops_info/onehot_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/onehot_info.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/onehot_info.h"
+#include "frontend/parallel/ops_info/onehot_info.h"
 
 #include <memory>
 #include <utility>
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/costmodel.h"
-#include "parallel/device_matrix.h"
-#include "parallel/graph_util/generate_graph.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/graph_util/generate_graph.h"
+#include "frontend/parallel/strategy.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/ops_info/onehot_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/onehot_info.h
similarity index 93%
rename from mindspore/ccsrc/parallel/ops_info/onehot_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/onehot_info.h
index 3c8a64f9542..dfd7e6cbafb 100644
--- a/mindspore/ccsrc/parallel/ops_info/onehot_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/onehot_info.h
@@ -23,9 +23,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/operator_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/ops_info/operator_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc
index f9b294898cb..3dd47b1de69 100644
--- a/mindspore/ccsrc/parallel/ops_info/operator_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/operator_info.h"
+#include "frontend/parallel/ops_info/operator_info.h"
 
 #include <algorithm>
 #include <cmath>
@@ -27,9 +27,9 @@
 #include "ir/dtype.h"
 #include "ir/tensor.h"
 #include "ir/value.h"
-#include "parallel/auto_parallel/edge_costmodel.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/context.h"
+#include "frontend/parallel/auto_parallel/edge_costmodel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/context.h"
 #include "utils/context/ms_context.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/parallel/ops_info/operator_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.h
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/operator_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/operator_info.h
index 21041c3e94b..8641c474913 100644
--- a/mindspore/ccsrc/parallel/ops_info/operator_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.h
@@ -26,15 +26,15 @@
 #include <vector>
 
 #include "common/utils.h"
-#include "ir/base.h"
-#include "parallel/auto_parallel/costmodel.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/group_manager.h"
-#include "parallel/ops_info/ops_utils.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_info.h"
+#include "base/base.h"
+#include "frontend/parallel/auto_parallel/costmodel.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/group_manager.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_info.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/ops_info_head_files.h b/mindspore/ccsrc/frontend/parallel/ops_info/ops_info_head_files.h
new file mode 100644
index 00000000000..bc732ed2340
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/ops_info_head_files.h
@@ -0,0 +1,41 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_PARALLEL_OPS_INFO_OPS_INFO_HEAD_FILES_H_
+#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_OPS_INFO_HEAD_FILES_H_
+
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/ops_info/arithmetic_info.h"
+#include "frontend/parallel/ops_info/batch_parallel_info.h"
+#include "frontend/parallel/ops_info/bias_add_info.h"
+#include "frontend/parallel/ops_info/comparison_function_info.h"
+#include "frontend/parallel/ops_info/dropout_do_mask_info.h"
+#include "frontend/parallel/ops_info/elementary_function_info.h"
+#include "frontend/parallel/ops_info/gather_v2_info.h"
+#include "frontend/parallel/ops_info/get_next_info.h"
+#include "frontend/parallel/ops_info/l2_normalize_info.h"
+#include "frontend/parallel/ops_info/layer_norm_info.h"
+#include "frontend/parallel/ops_info/loss_info.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/ops_info/onehot_info.h"
+#include "frontend/parallel/ops_info/prelu_info.h"
+#include "frontend/parallel/ops_info/reduce_method_info.h"
+#include "frontend/parallel/ops_info/reshape_info.h"
+#include "frontend/parallel/ops_info/transpose_info.h"
+#include "frontend/parallel/ops_info/virtual_dataset_info.h"
+#include "frontend/parallel/ops_info/gather_v2_p_info.h"
+
+#endif  // MINDSPORE_CCSRC_PARALLEL_OPS_INFO_HEAD_FILES_H_
diff --git a/mindspore/ccsrc/parallel/ops_info/ops_utils.h b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
similarity index 99%
rename from mindspore/ccsrc/parallel/ops_info/ops_utils.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
index 9cb3c7040af..79dfb56693b 100644
--- a/mindspore/ccsrc/parallel/ops_info/ops_utils.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
@@ -65,6 +65,7 @@ constexpr char STEP_PARALLEL_END[] = "step_parallel_end";
 constexpr char STEP_AUTO_PARALLEL_BEGIN[] = "step_auto_parallel_begin.dot";
 constexpr char REQUIRES_GRAD[] = "requires_grad";
 constexpr char PARAM_NAME[] = "name";
+constexpr char RESHAPEINFO[] = "ReshapeInfo";
 
 constexpr char RELU_TYPE[] = "relu";
 constexpr char RELU6_TYPE[] = "relu6";
@@ -131,6 +132,7 @@ constexpr char REDISTRIBUTION_OP[] = "redistribution_op";
 constexpr char DARA_PARALLEL[] = "data_parallel";
 constexpr char FORWARD_REDUCE_SCATTER[] = "forward_reduce_scatter";
 constexpr char OPTIMIZER_SUB_STRING[] = "optimizer";
+constexpr char DEVICE[] = "Device";
 
 // Operator
 constexpr char VIRTUAL_DIV[] = "_VirtualDiv";
diff --git a/mindspore/ccsrc/parallel/ops_info/prelu_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/prelu_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/prelu_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/prelu_info.cc
index 14483e97a14..57b35b69f7a 100644
--- a/mindspore/ccsrc/parallel/ops_info/prelu_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/prelu_info.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/prelu_info.h"
+#include "frontend/parallel/ops_info/prelu_info.h"
 
 #include <memory>
 #include <utility>
 #include <vector>
 
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/step_parallel.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/parallel/ops_info/prelu_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/prelu_info.h
similarity index 95%
rename from mindspore/ccsrc/parallel/ops_info/prelu_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/prelu_info.h
index 28e149fad76..e6e5e23bacd 100644
--- a/mindspore/ccsrc/parallel/ops_info/prelu_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/prelu_info.h
@@ -23,8 +23,8 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/reduce_method_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.cc
index 7304666a776..0488dceecad 100644
--- a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/reduce_method_info.h"
+#include "frontend/parallel/ops_info/reduce_method_info.h"
 
 #include <algorithm>
 #include <memory>
@@ -22,9 +22,9 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.h
similarity index 96%
rename from mindspore/ccsrc/parallel/ops_info/reduce_method_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.h
index 796c7e457bb..ed9ab0721dd 100644
--- a/mindspore/ccsrc/parallel/ops_info/reduce_method_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/reduce_method_info.h
@@ -24,9 +24,9 @@
 
 #include "ir/tensor.h"
 #include "ir/value.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/reshape_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/reshape_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.cc
index 57e1a76d0ab..fb62c1d02c0 100644
--- a/mindspore/ccsrc/parallel/ops_info/reshape_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/reshape_info.h"
+#include "frontend/parallel/ops_info/reshape_info.h"
 
 #include <memory>
 #include <vector>
 
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/step_parallel.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/step_parallel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/parallel/ops_info/reshape_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.h
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/reshape_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.h
index 77a1f8e7f1d..2463b440f81 100644
--- a/mindspore/ccsrc/parallel/ops_info/reshape_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.h
@@ -24,8 +24,8 @@
 #include <unordered_map>
 #include <vector>
 
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/tmp_identity_info.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/ops_info/tmp_identity_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/tmp_identity_info.cc
index 772a4f83f69..ed6eaa89f1b 100644
--- a/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/tmp_identity_info.cc
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/tmp_identity_info.h"
+#include "frontend/parallel/ops_info/tmp_identity_info.h"
 
 #include <memory>
 #include <vector>
diff --git a/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/tmp_identity_info.h
similarity index 93%
rename from mindspore/ccsrc/parallel/ops_info/tmp_identity_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/tmp_identity_info.h
index f7895d05112..7f73f811805 100644
--- a/mindspore/ccsrc/parallel/ops_info/tmp_identity_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/tmp_identity_info.h
@@ -21,9 +21,9 @@
 #include <string>
 #include <vector>
 
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/transpose_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/transpose_info.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/ops_info/transpose_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/transpose_info.cc
index 49bbae0cb4e..b6bb875abc1 100644
--- a/mindspore/ccsrc/parallel/ops_info/transpose_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/transpose_info.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/transpose_info.h"
+#include "frontend/parallel/ops_info/transpose_info.h"
 
 #include <memory>
 #include <vector>
 
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/step_parallel.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/parallel/ops_info/transpose_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/transpose_info.h
similarity index 95%
rename from mindspore/ccsrc/parallel/ops_info/transpose_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/transpose_info.h
index 50b76bde650..d3b62dc2349 100644
--- a/mindspore/ccsrc/parallel/ops_info/transpose_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/transpose_info.h
@@ -23,8 +23,8 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/virtual_dataset_info.cc
similarity index 96%
rename from mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.cc
rename to mindspore/ccsrc/frontend/parallel/ops_info/virtual_dataset_info.cc
index ce8b04d8028..3b89d7c84c9 100644
--- a/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/virtual_dataset_info.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "parallel/ops_info/virtual_dataset_info.h"
+#include "frontend/parallel/ops_info/virtual_dataset_info.h"
 
 #include <memory>
 #include <utility>
 #include <vector>
 
-#include "parallel/device_manager.h"
-#include "parallel/device_matrix.h"
-#include "parallel/step_parallel.h"
-#include "parallel/context.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/step_parallel.h"
+#include "frontend/parallel/context.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/virtual_dataset_info.h
similarity index 95%
rename from mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.h
rename to mindspore/ccsrc/frontend/parallel/ops_info/virtual_dataset_info.h
index 312ac7a6a47..fe54954be0b 100644
--- a/mindspore/ccsrc/parallel/ops_info/virtual_dataset_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/virtual_dataset_info.h
@@ -23,8 +23,8 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/frontend/parallel/ps/common.h b/mindspore/ccsrc/frontend/parallel/ps/common.h
new file mode 100644
index 00000000000..5e136c816f2
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/common.h
@@ -0,0 +1,87 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_COMMON_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_COMMON_H_
+
+#include <iostream>
+#include <vector>
+#include <memory>
+#include "ps/ps.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+constexpr char kEnvCommType[] = "MS_COMM_TYPE";
+constexpr char kEnvInterface[] = "MS_INTERFACE";
+constexpr char kEnvPServerNum[] = "MS_SERVER_NUM";
+constexpr char kEnvWorkerNum[] = "MS_WORKER_NUM";
+constexpr char kEnvSchedulerHost[] = "MS_SCHED_HOST";
+constexpr char kEnvSchedulerPort[] = "MS_SCHED_PORT";
+
+constexpr char kEnvRole[] = "MS_ROLE";
+constexpr char kEnvRoleOfPServer[] = "MS_PSERVER";
+constexpr char kEnvRoleOfWorker[] = "MS_WORKER";
+constexpr char kEnvRoleOfScheduler[] = "MS_SCHED";
+
+constexpr char kDmlcCommType[] = "DMLC_PS_VAN_TYPE";
+constexpr char kDmlcInterface[] = "DMLC_INTERFACE";
+constexpr char kDmlcPServerNum[] = "DMLC_NUM_SERVER";
+constexpr char kDmlcWorkerNum[] = "DMLC_NUM_WORKER";
+constexpr char kDmlcRole[] = "DMLC_ROLE";
+constexpr char kDmlcSchedulerHost[] = "DMLC_PS_ROOT_URI";
+constexpr char kDmlcSchedulerPort[] = "DMLC_PS_ROOT_PORT";
+
+constexpr char kCommTypeOfIBVerbs[] = "ibverbs";
+constexpr char kCommTypeOfTCP[] = "zmq";
+constexpr char kRoleOfPServer[] = "server";
+constexpr char kRoleOfWorker[] = "worker";
+constexpr char kRoleOfScheduler[] = "scheduler";
+
+constexpr char kLearningRate[] = "learning_rate";
+constexpr char kMomentum[] = "momentum";
+
+constexpr char kApplyMomentum[] = "ApplyMomentum";
+constexpr char kSparseAdam[] = "Adam";
+constexpr char kSparseFtrl[] = "Ftrl";
+
+constexpr int kInitWeightsCmd = 10;
+constexpr int kInitWeightToOptimIdCmd = 11;
+constexpr int kInitOptimInputsShapeCmd = 12;
+constexpr int kInitEmbeddingsCmd = 20;
+constexpr int kEmbeddingLookupCmd = 30;
+
+constexpr size_t kInvalidKey = UINT64_MAX;
+
+using Key = ::ps::Key;
+using Keys = ::ps::SArray<Key>;
+using Values = ::ps::SArray<float>;
+using ValuesPtr = std::shared_ptr<Values>;
+using Weight = ::ps::SArray<float>;
+using Grad = ::ps::SArray<float>;
+using LookupIds = ::ps::SArray<float>;
+using Lengths = ::ps::SArray<int>;
+using WeightPtr = std::shared_ptr<Weight>;
+using GradPtr = std::shared_ptr<Grad>;
+// using EmbeddingTable = std::unordered_map<int, WeightPtr>;
+// using EmbeddingTable = ::ps::SArray<float>;
+// using EmbeddingTablePtr = std::shared_ptr<EmbeddingTable>;
+using InputsShape = std::vector<std::shared_ptr<std::vector<size_t>>>;
+using InputsShapePtr = std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>>;
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_COMMON_H_
diff --git a/mindspore/ccsrc/frontend/parallel/ps/optimizer_info.cc b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info.cc
new file mode 100644
index 00000000000..e16c713e3c7
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info.cc
@@ -0,0 +1,184 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "frontend/parallel/ps/optimizer_info.h"
+#include <memory>
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+void OptimizerInfo::AddWorkspace(const AddressPtr &workspace) { workspaces_.push_back(workspace); }
+
+const std::vector<AddressPtr> &OptimizerInfo::inputs() { return inputs_; }
+
+const std::vector<AddressPtr> &OptimizerInfo::workspaces() { return workspaces_; }
+
+const std::vector<AddressPtr> &OptimizerInfo::outputs() { return outputs_; }
+
+bool OptimizerInfo::IsSparse() const { return false; }
+
+size_t OptimizerInfo::grad_index() { return 0; }
+
+size_t OptimizerInfo::indices_index() { return 0; }
+
+void OptimizerInfo::UpdateWeight(const WeightPtr &weight) {
+  AddressPtr weight_addr = std::make_shared<kernel::Address>();
+  weight_addr->addr = weight->data();
+  weight_addr->size = weight->size();
+  inputs_[0] = weight_addr;
+}
+
+void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
+  float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
+  size_t size = gradient()->size / sizeof(float);
+  size_t grad_index = this->grad_index();
+  size_t grad_offset = 0;
+  for (size_t i = 0; i < grad_index; i++) {
+    grad_offset += lengths[i];
+  }
+  float *grad_data = values.data() + grad_offset;
+  CHECK_EQ(size, static_cast<size_t>(lengths[grad_index]));
+
+  for (size_t i = 0; i < size; i++) {
+    accum_grad_data[i] += grad_data[i];
+  }
+}
+
+void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
+  // Append grad data to the end
+  float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
+
+  size_t grad_index = this->grad_index();
+  size_t grad_offset = 0;
+  for (size_t i = 0; i < grad_index; i++) {
+    grad_offset += lengths[i];
+  }
+  float *incr_grad_data = values.data() + grad_offset;
+  size_t incr_grad_size = lengths[grad_index] * sizeof(float);
+
+  auto ret = memcpy_s(accum_grad_data + grads_offset_, incr_grad_size, incr_grad_data, incr_grad_size);
+  if (ret != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
+  }
+  grads_offset_ += incr_grad_size;
+  gradient()->size += incr_grad_size;
+
+  // Append indice data to the end
+  int *accum_indices_data = reinterpret_cast<int *>(indices()->addr);
+
+  size_t indices_index = this->indices_index();
+  size_t indice_offset = 0;
+  for (size_t i = 0; i < indices_index; i++) {
+    indice_offset += lengths[i];
+  }
+  int *incr_indice_data = reinterpret_cast<int *>(values.data() + indice_offset);
+  size_t incr_indice_size = lengths[indices_index] * sizeof(float);
+
+  auto ret2 = memcpy_s(accum_indices_data + indices_offset_, incr_indice_size, incr_indice_data, incr_indice_size);
+  if (ret2 != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
+  }
+  indices_offset_ += incr_indice_size;
+  indices()->size += incr_indice_size;
+}
+
+void SparseOptimInfo::Reset() {
+  auto &gradient = this->gradient();
+  gradient->size = 0;
+  auto &indices = this->indices();
+  indices->size = 0;
+  grads_offset_ = 0;
+  indices_offset_ = 0;
+}
+
+MomentumOptimInfo::MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate,
+                                     const AddressPtr &learning_rate, const AddressPtr &gradient,
+                                     const AddressPtr &momentum) {
+  inputs_.push_back(weight);
+  inputs_.push_back(accumulate);
+  inputs_.push_back(learning_rate);
+  inputs_.push_back(gradient);
+  inputs_.push_back(momentum);
+}
+
+const AddressPtr &MomentumOptimInfo::gradient() { return inputs_[3]; }
+
+const AddressPtr &MomentumOptimInfo::indices() { return inputs_[3]; }
+
+SparseAdamOptimInfo::SparseAdamOptimInfo(const AddressPtr &weight, const AddressPtr &m, const AddressPtr &v,
+                                         const AddressPtr &beta1_power, const AddressPtr &beta2_power,
+                                         const AddressPtr &learning_rate, const AddressPtr &beta1,
+                                         const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad,
+                                         const AddressPtr &indices, size_t grads_offset, size_t indices_offset) {
+  inputs_.push_back(weight);
+  inputs_.push_back(m);
+  inputs_.push_back(v);
+  inputs_.push_back(beta1_power);
+  inputs_.push_back(beta2_power);
+  inputs_.push_back(learning_rate);
+  inputs_.push_back(beta1);
+  inputs_.push_back(beta2);
+  inputs_.push_back(epsilon);
+  inputs_.push_back(grad);
+  inputs_.push_back(indices);
+  grads_offset_ = grads_offset;
+  indices_offset_ = indices_offset;
+}
+
+void SparseAdamOptimInfo::Update(const Values &values, const Lengths &lens) {
+  void *data_ptr = values.data();
+  AddressPtr beta1_power = inputs_[3];
+  size_t size = values.size() * sizeof(float);
+  auto ret = memcpy_s(beta1_power->addr, size, data_ptr, size);
+  if (ret != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
+  }
+}
+
+const AddressPtr &SparseAdamOptimInfo::gradient() { return inputs_[9]; }
+
+const AddressPtr &SparseAdamOptimInfo::indices() { return inputs_[10]; }
+
+bool SparseAdamOptimInfo::IsSparse() const { return true; }
+
+size_t SparseAdamOptimInfo::grad_index() { return 6; }
+
+size_t SparseAdamOptimInfo::indices_index() { return 7; }
+
+SparseFtrlOptimInfo::SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear,
+                                         const AddressPtr &grad, const AddressPtr &indices, size_t grads_offset,
+                                         size_t indices_offset) {
+  inputs_.push_back(weight);
+  inputs_.push_back(accum);
+  inputs_.push_back(linear);
+  inputs_.push_back(grad);
+  inputs_.push_back(indices);
+  grads_offset_ = grads_offset;
+  indices_offset_ = indices_offset;
+}
+
+const AddressPtr &SparseFtrlOptimInfo::gradient() { return inputs_[3]; }
+
+const AddressPtr &SparseFtrlOptimInfo::indices() { return inputs_[4]; }
+
+bool SparseFtrlOptimInfo::IsSparse() const { return true; }
+
+size_t SparseFtrlOptimInfo::grad_index() { return 0; }
+
+size_t SparseFtrlOptimInfo::indices_index() { return 1; }
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/frontend/parallel/ps/optimizer_info.h b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info.h
new file mode 100644
index 00000000000..bb9a64acdb3
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info.h
@@ -0,0 +1,117 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_OPTIMIZER_INFO_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_OPTIMIZER_INFO_H_
+
+#include <vector>
+#include "backend/kernel_compiler/kernel.h"
+#include "frontend/parallel/ps/common.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+using mindspore::kernel::AddressPtr;
+class OptimizerInfo {
+ public:
+  OptimizerInfo() = default;
+  virtual ~OptimizerInfo() = default;
+
+  virtual void Update(const Values &values, const Lengths &lengths) {}
+  virtual void UpdateWeight(const WeightPtr &weight);
+  virtual void Accumulate(const Values &values, const Lengths &lengths) = 0;
+  virtual void Reset() {}
+  void AddWorkspace(const AddressPtr &workspace);
+
+  virtual const AddressPtr &gradient() = 0;
+  virtual const AddressPtr &indices() = 0;
+  const std::vector<AddressPtr> &inputs();
+  const std::vector<AddressPtr> &workspaces();
+  const std::vector<AddressPtr> &outputs();
+
+  virtual bool IsSparse() const;
+  virtual size_t grad_index();
+  virtual size_t indices_index();
+
+ protected:
+  std::vector<AddressPtr> inputs_;
+  std::vector<AddressPtr> workspaces_;
+  std::vector<AddressPtr> outputs_;
+};
+
+class DenseOptimInfo : public OptimizerInfo {
+ public:
+  DenseOptimInfo() = default;
+  ~DenseOptimInfo() override = default;
+
+  void Accumulate(const Values &values, const Lengths &lens) override;
+};
+
+class SparseOptimInfo : public OptimizerInfo {
+ public:
+  SparseOptimInfo() = default;
+  ~SparseOptimInfo() override = default;
+
+  void Accumulate(const Values &values, const Lengths &lens) override;
+  void Reset() override;
+
+ protected:
+  size_t grads_offset_{0};
+  size_t indices_offset_{0};
+};
+
+class MomentumOptimInfo : public DenseOptimInfo {
+ public:
+  MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate, const AddressPtr &learning_rate,
+                    const AddressPtr &gradient, const AddressPtr &momentum);
+  ~MomentumOptimInfo() override = default;
+
+  const AddressPtr &gradient();
+  const AddressPtr &indices();
+};
+
+class SparseAdamOptimInfo : public SparseOptimInfo {
+ public:
+  SparseAdamOptimInfo(const AddressPtr &weight, const AddressPtr &m, const AddressPtr &v, const AddressPtr &beta1_power,
+                      const AddressPtr &beta2_power, const AddressPtr &learning_rate, const AddressPtr &beta1,
+                      const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad,
+                      const AddressPtr &indices, size_t grads_offset, size_t indices_offset);
+  ~SparseAdamOptimInfo() override = default;
+
+  void Update(const Values &values, const Lengths &lens) override;
+  const AddressPtr &gradient();
+  const AddressPtr &indices();
+  bool IsSparse() const override;
+  size_t grad_index() override;
+  size_t indices_index() override;
+};
+
+class SparseFtrlOptimInfo : public SparseOptimInfo {
+ public:
+  SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear,
+                      const AddressPtr &grad, const AddressPtr &indices, size_t grads_offset, size_t indices_offset);
+  ~SparseFtrlOptimInfo() override = default;
+
+  const AddressPtr &gradient();
+  const AddressPtr &indices();
+  bool IsSparse() const override;
+  size_t grad_index() override;
+  size_t indices_index() override;
+};
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_OPTIMIZER_INFO_H_
diff --git a/mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.cc b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.cc
new file mode 100644
index 00000000000..159a50793e1
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.cc
@@ -0,0 +1,184 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "frontend/parallel/ps/optimizer_info_builder.h"
+#include <functional>
+#include <vector>
+#include <memory>
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr<PServerKernel> &pserver_kernel,
+                                           const WeightPtr &weight, const Keys &keys, const Values &values,
+                                           const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num) {
+  OptimizerInfo *optim_info = BuildInputs(weight, keys, values, lens, inputs_shape, worker_num);
+  std::vector<size_t> ws_sizes = pserver_kernel->workspace_sizes();
+  BuildWorkspaces(optim_info, ws_sizes, worker_num);
+  BuildOutputs(optim_info, worker_num);
+  return optim_info;
+}
+
+void OptimizerInfoBuilder::BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes,
+                                           size_t worker_num) {
+  for (size_t i = 0; i < ws_sizes.size(); i++) {
+    size_t size = ws_sizes[i];
+    AddressPtr workspace = std::make_shared<kernel::Address>();
+    workspace->addr = new float[size];
+    workspace->size = size;
+    info->AddWorkspace(workspace);
+  }
+}
+
+OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
+                                                     const Lengths &lens, const InputsShapePtr &inputs_shape,
+                                                     size_t worker_num) {
+  AddressPtr weight_addr = std::make_shared<kernel::Address>();
+  weight_addr->addr = weight->data();
+  weight_addr->size = weight->size();
+  void *data_ptr = values.data();
+  AddressPtr accumulate = std::make_shared<kernel::Address>();
+  accumulate->addr = new float[weight->size()];
+  accumulate->size = weight->size();
+  AddressPtr learning_rate = std::make_shared<kernel::Address>();
+  learning_rate->addr = data_ptr;
+  learning_rate->size = lens[0];
+  AddressPtr gradient = std::make_shared<kernel::Address>();
+  gradient->addr = reinterpret_cast<float *>(learning_rate->addr) + lens[0];
+  gradient->size = lens[1];
+  AddressPtr momentum = std::make_shared<kernel::Address>();
+  momentum->addr = reinterpret_cast<float *>(gradient->addr) + lens[1];
+  momentum->size = lens[2];
+
+  return new MomentumOptimInfo(weight_addr, accumulate, learning_rate, gradient, momentum);
+}
+
+OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
+                                                       const Lengths &lens, const InputsShapePtr &inputs_shape,
+                                                       size_t worker_num) {
+  AddressPtr weight_addr = std::make_shared<kernel::Address>();
+  weight_addr->addr = weight->data();
+  weight_addr->size = weight->size();
+  AddressPtr m = std::make_shared<kernel::Address>();
+  m->addr = new float[weight->size()];
+  m->size = weight->size() * sizeof(float);
+  AddressPtr v = std::make_shared<kernel::Address>();
+  v->addr = new float[weight->size()];
+  v->size = weight->size() * sizeof(float);
+
+  void *data_ptr = values.data();
+  void *copy_data_ptr = new float[values.size()];
+  auto ret = memcpy_s(copy_data_ptr, values.size() * sizeof(float), data_ptr, values.size() * sizeof(float));
+  if (ret != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
+  }
+
+  AddressPtr beta1_power = std::make_shared<kernel::Address>();
+  beta1_power->addr = copy_data_ptr;
+  beta1_power->size = lens[0] * sizeof(float);
+  AddressPtr beta2_power = std::make_shared<kernel::Address>();
+  beta2_power->addr = reinterpret_cast<float *>(beta1_power->addr) + lens[0];
+  beta2_power->size = lens[1] * sizeof(float);
+
+  AddressPtr learning_rate = std::make_shared<kernel::Address>();
+  learning_rate->addr = reinterpret_cast<float *>(beta2_power->addr) + lens[1];
+  learning_rate->size = lens[2] * sizeof(float);
+
+  AddressPtr beta1 = std::make_shared<kernel::Address>();
+  beta1->addr = reinterpret_cast<float *>(learning_rate->addr) + lens[2];
+  beta1->size = lens[3] * sizeof(float);
+
+  AddressPtr beta2 = std::make_shared<kernel::Address>();
+  beta2->addr = reinterpret_cast<float *>(beta1->addr) + lens[3];
+  beta2->size = lens[4] * sizeof(float);
+
+  AddressPtr epsilon = std::make_shared<kernel::Address>();
+  epsilon->addr = reinterpret_cast<float *>(beta2->addr) + lens[4];
+  epsilon->size = lens[5] * sizeof(float);
+
+  const std::shared_ptr<std::vector<size_t>> &grad_shape = (*inputs_shape)[9];
+  size_t total_grad_size =
+    std::accumulate((*grad_shape).begin(), (*grad_shape).end(), sizeof(float), std::multiplies<size_t>());
+  AddressPtr grad = std::make_shared<kernel::Address>();
+  grad->addr = new float[total_grad_size * worker_num];
+  auto ret2 = memcpy_s(grad->addr, lens[6] * sizeof(float), reinterpret_cast<float *>(epsilon->addr) + lens[5],
+                       lens[6] * sizeof(float));
+  if (ret2 != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
+  }
+  grad->size = lens[6] * sizeof(float);
+
+  const std::shared_ptr<std::vector<size_t>> &indices_shape = (*inputs_shape)[10];
+  size_t total_indice_size =
+    std::accumulate((*indices_shape).begin(), (*indices_shape).end(), sizeof(float), std::multiplies<size_t>());
+  AddressPtr indices = std::make_shared<kernel::Address>();
+  indices->addr = new float[total_indice_size * worker_num];
+  auto ret3 = memcpy_s(indices->addr, lens[7] * sizeof(float),
+                       reinterpret_cast<float *>(epsilon->addr) + lens[5] + lens[6], lens[7] * sizeof(float));
+  if (ret3 != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret3 << ")";
+  }
+  indices->size = lens[7] * sizeof(float);
+
+  return new SparseAdamOptimInfo(weight_addr, m, v, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon,
+                                 grad, indices, total_grad_size, total_indice_size);
+}
+
+OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
+                                                       const Lengths &lens, const InputsShapePtr &inputs_shape,
+                                                       size_t worker_num) {
+  AddressPtr weight_addr = std::make_shared<kernel::Address>();
+  weight_addr->addr = weight->data();
+  weight_addr->size = weight->size();
+  AddressPtr accum = std::make_shared<kernel::Address>();
+  accum->addr = new float[weight->size()];
+  accum->size = weight->size() * sizeof(float);
+  for (size_t i = 0; i < weight->size(); i++) {
+    float *tmp = reinterpret_cast<float *>(accum->addr);
+    tmp[i] = 1.0;
+  }
+  AddressPtr linear = std::make_shared<kernel::Address>();
+  linear->addr = new float[weight->size()];
+  memcpy_s(linear->addr, weight->size() * sizeof(float), 0x00, weight->size() * sizeof(float));
+  linear->size = weight->size() * sizeof(float);
+
+  const std::shared_ptr<std::vector<size_t>> &grad_shape = (*inputs_shape)[3];
+  size_t total_grad_size = std::accumulate((*grad_shape).begin(), (*grad_shape).end(), 1, std::multiplies<size_t>());
+  AddressPtr grad = std::make_shared<kernel::Address>();
+  grad->addr = new float[total_grad_size * worker_num];
+  auto ret = memcpy_s(grad->addr, lens[0] * sizeof(float), values.data(), lens[0] * sizeof(float));
+  if (ret != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
+  }
+  grad->size = lens[0] * sizeof(float);
+
+  const std::shared_ptr<std::vector<size_t>> &indices_shape = (*inputs_shape)[4];
+  size_t total_indice_size =
+    std::accumulate((*indices_shape).begin(), (*indices_shape).end(), 1, std::multiplies<size_t>());
+  AddressPtr indices = std::make_shared<kernel::Address>();
+  indices->addr = new float[total_indice_size * worker_num];
+  auto ret2 = memcpy_s(indices->addr, lens[1] * sizeof(float), reinterpret_cast<float *>(values.data()) + lens[0],
+                       lens[1] * sizeof(float));
+  if (ret2 != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
+  }
+  indices->size = lens[1] * sizeof(float);
+
+  return new SparseFtrlOptimInfo(weight_addr, accum, linear, grad, indices, total_grad_size, total_indice_size);
+}
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.h b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.h
new file mode 100644
index 00000000000..c5aae32921b
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.h
@@ -0,0 +1,66 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_OPTIMIZER_INFO_BUILDER_H_
+
+#include <vector>
+#include <memory>
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/ps/pserver_kernel.h"
+#include "frontend/parallel/ps/optimizer_info.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+using mindspore::kernel::KernelMod;
+using mindspore::kernel::ps::PServerKernel;
+class OptimizerInfoBuilder {
+ public:
+  OptimizerInfoBuilder() = default;
+  virtual ~OptimizerInfoBuilder() = default;
+
+  OptimizerInfo *Build(const std::shared_ptr<PServerKernel> &pserver_kernel, const WeightPtr &weight, const Keys &keys,
+                       const Values &values, const Lengths &lens, const InputsShapePtr &inputs_shape,
+                       size_t worker_num);
+
+  virtual OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
+                                     const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num) = 0;
+
+  virtual void BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes, size_t worker_num);
+  virtual void BuildOutputs(OptimizerInfo *info, size_t worker_num) {}
+};
+
+class MomentumOptimInfoBuilder : public OptimizerInfoBuilder {
+ public:
+  OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
+                             const InputsShapePtr &inputs_shape, size_t worker_num) override;
+};
+
+class SparseAdamOptimInfoBuilder : public OptimizerInfoBuilder {
+ public:
+  OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
+                             const InputsShapePtr &inputs_shpae, size_t worker_num) override;
+};
+
+class SparseFtrlOptimInfoBuilder : public OptimizerInfoBuilder {
+ public:
+  OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
+                             const InputsShapePtr &inputs_shpae, size_t worker_num) override;
+};
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_OPTIMIZER_INFO_BUILDER_H_
diff --git a/mindspore/ccsrc/frontend/parallel/ps/parameter_server.h b/mindspore/ccsrc/frontend/parallel/ps/parameter_server.h
new file mode 100755
index 00000000000..1afb4c9fa65
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/parameter_server.h
@@ -0,0 +1,559 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_PARAMETER_SERVER_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_PARAMETER_SERVER_H_
+
+#include <unistd.h>
+#include <unordered_map>
+#include <string>
+#include <iostream>
+#include <memory>
+#include <vector>
+#include <mutex>
+#include <condition_variable>
+#include <thread>
+#include <cmath>
+#include <random>
+#include "ir/func_graph.h"
+#include "backend/session/session_basic.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/session_factory.h"
+#include "frontend/parallel/ps/common.h"
+#include "frontend/parallel/ps/optimizer_info.h"
+#include "frontend/parallel/ps/optimizer_info_builder.h"
+#include "frontend/parallel/ps/util.h"
+#include "runtime/device/cpu/kernel_select_cpu.h"
+#include "utils/context/ms_context.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/ps/pserver_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/ps/sparse_apply_adam_ps_kernel.h"
+#include "backend/kernel_compiler/ps/sparse_apply_ftrl_ps_kernel.h"
+#include "backend/kernel_compiler/ps/apply_momentum_ps_kernel.h"
+#include "backend/kernel_compiler/ps/embedding_look_up_ps_kernel.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+using mindspore::kernel::ps::PServerKernel;
+template <typename T>
+class ParameterServer {
+ public:
+  static ParameterServer &GetInstance() {
+    static ParameterServer instance;
+    return instance;
+  }
+
+  void Run(const FuncGraphPtr &func_graph);
+
+ private:
+  ParameterServer()
+      : pserver_num_(0),
+        worker_num_(0),
+        rank_id_(0),
+        grad_accum_count_(0),
+        ps_(new ::ps::KVServer<T>(0)),
+        handler_(nullptr),
+        func_graph_(nullptr),
+        kernel_graph_(nullptr),
+        sess_(nullptr),
+        thread_(nullptr) {}
+  ~ParameterServer() = default;
+  ParameterServer(const ParameterServer &) = delete;
+  ParameterServer &operator=(const ParameterServer &) = delete;
+
+  struct ServerHandler {
+    explicit ServerHandler(ParameterServer *ps) : ps_(ps) {}
+    void operator()(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVServer<T> *server);
+    void HandlePushReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data);
+    void HandlePullReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
+    void HandleInitWeights(const ::ps::KVPairs<T> &req_data);
+    void HandleInitWeightToOptimId(const ::ps::KVPairs<T> &req_data);
+    void HandleInitInputsShape(const ::ps::KVPairs<T> &req_data);
+    void HandleInitEmbeddings(const ::ps::KVPairs<T> &req_data);
+    void HandleEmbeddingLookup(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
+    ParameterServer *ps_;
+  };
+
+  bool Init(const FuncGraphPtr &func_graph);
+  void InitOptimInfoBuilders();
+  void InitWeightKeyToOptims(const Key &key, const int &optim_id);
+  void InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths);
+  void InitWeight(const Key &key, const WeightPtr &weight);
+  void InitGrad(const Key &key, const GradPtr &grad);
+  void InitEmbeddingTable(const Key &key,
+                          const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes);
+  void UpdateWeights();
+  void AccumGrad(const Keys &key, const Values &values, const Lengths &lengths);
+  WeightPtr weight(const Key &key);
+  void DoEmbeddingLookup(Key key, const LookupIds &lookup_ids, ::ps::KVPairs<T> *res);
+  int SumOfShapes(const std::vector<int> &shapes) const;
+  size_t PreComputeCapacity(const Keys &keys, const Lengths &lens);
+  bool ReadyForUpdateWeights();
+  bool ReadyForAccumGrads();
+  void ResetGradAccumCount();
+
+  size_t pserver_num_;
+  size_t worker_num_;
+  size_t rank_id_;
+  size_t grad_accum_count_;
+  std::unique_ptr<::ps::KVServer<T>> ps_;
+  std::unique_ptr<ServerHandler> handler_;
+  FuncGraphPtr func_graph_;
+  std::shared_ptr<session::KernelGraph> kernel_graph_;
+  std::shared_ptr<session::SessionBasic> sess_;
+
+  std::unordered_map<std::string, std::shared_ptr<PServerKernel>> optimizers_;
+  std::unordered_map<Key, InputsShapePtr> optim_inputs_shape_;
+  std::unordered_map<Key, std::shared_ptr<OptimizerInfo>> optim_infos_;
+  std::unordered_map<std::string, std::shared_ptr<OptimizerInfoBuilder>> optim_info_builders_;
+  std::unordered_map<Key, std::string> weight_key_to_optims_;
+  std::unordered_map<Key, WeightPtr> weights_;
+  std::unordered_map<Key, WeightPtr> grads_;
+  std::unordered_map<Key, size_t> grads_accum_counter_;
+  // std::unordered_map<Key, EmbeddingTablePtr> embeddings_;
+  std::unordered_map<Key, std::shared_ptr<PServerKernel>> embedding_lookup_ops_;
+  std::unordered_map<Key, size_t> embedding_row_lens_;
+
+  T learning_rate_;
+  T momentum_;
+
+  std::mutex mutex_;
+  std::condition_variable apply_grads_cv_;
+  std::condition_variable accum_grads_cv_;
+
+  std::unique_ptr<std::thread> thread_;
+
+  friend struct ServerHandler;
+};
+
+class FuncGraph;
+template <typename T>
+void ParameterServer<T>::ServerHandler::operator()(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
+                                                   ::ps::KVServer<T> *server) {
+  ::ps::KVPairs<T> res;
+  if (req_meta.cmd == kInitWeightsCmd) {
+    MS_LOG(ERROR) << "handle init weights cmd" << std::endl;
+    HandleInitWeights(req_data);
+  } else if (req_meta.cmd == kInitWeightToOptimIdCmd) {
+    MS_LOG(ERROR) << "handle init weight optim id mapping cmd" << std::endl;
+    HandleInitWeightToOptimId(req_data);
+  } else if (req_meta.cmd == kInitOptimInputsShapeCmd) {
+    MS_LOG(ERROR) << "handle init inputs shape cmd" << std::endl;
+    HandleInitInputsShape(req_data);
+  } else if (req_meta.cmd == kInitEmbeddingsCmd) {
+    MS_LOG(ERROR) << "handle init embedding cmd" << std::endl;
+    HandleInitEmbeddings(req_data);
+  } else if (req_meta.cmd == kEmbeddingLookupCmd) {
+    MS_LOG(ERROR) << "handle embedding lookup cmd" << std::endl;
+    HandleEmbeddingLookup(req_meta, req_data, &res);
+  } else if (req_meta.push) {
+    MS_LOG(ERROR) << "handle push req cmd" << std::endl;
+    HandlePushReq(req_meta, req_data);
+  } else {
+    MS_LOG(ERROR) << "handle pull req cmd" << std::endl;
+    HandlePullReq(req_meta, req_data, &res);
+  }
+  server->Response(req_meta, res);
+}
+
+template <typename T>
+void ParameterServer<T>::ServerHandler::HandlePushReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data) {
+  ps_->AccumGrad(req_data.keys, req_data.vals, req_data.lens);
+}
+
+template <typename T>
+void ParameterServer<T>::ServerHandler::HandlePullReq(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
+                                                      ::ps::KVPairs<T> *res) {
+  res->keys = req_data.keys;
+  ::ps::Key key = req_data.keys[0];
+  res->vals = *(ps_->weight(key));
+}
+
+template <typename T>
+void ParameterServer<T>::ServerHandler::HandleInitWeights(const ::ps::KVPairs<T> &req_data) {
+  size_t key_num = req_data.keys.size();
+  T *data_ptr = req_data.vals.data();
+  size_t pos = 0;
+  for (size_t i = 0; i < key_num; i++) {
+    Key key = req_data.keys[i];
+    size_t data_len = req_data.lens.size() != key_num ? req_data.vals.size() / key_num : req_data.lens[i];
+
+    WeightPtr weight_ptr = std::make_shared<::ps::SArray<T>>();
+    weight_ptr->CopyFrom(data_ptr + pos, data_len);
+    ps_->InitWeight(key, weight_ptr);
+
+    GradPtr grad_ptr = std::make_shared<::ps::SArray<T>>(data_len, 0);
+    ps_->InitGrad(key, grad_ptr);
+    pos += data_len;
+  }
+}
+
+template <typename T>
+void ParameterServer<T>::ServerHandler::HandleInitWeightToOptimId(const ::ps::KVPairs<T> &req_data) {
+  size_t key_num = req_data.keys.size();
+  for (size_t i = 0; i < key_num; i++) {
+    Key key = req_data.keys[i];
+    T val = req_data.vals[i];
+    ps_->InitWeightKeyToOptims(key, val);
+  }
+}
+
+template <typename T>
+void ParameterServer<T>::ServerHandler::HandleInitInputsShape(const ::ps::KVPairs<T> &req_data) {
+  ps_->InitOptimInputsShape(req_data.keys, req_data.vals, req_data.lens);
+}
+
+template <typename T>
+void ParameterServer<T>::ServerHandler::HandleInitEmbeddings(const ::ps::KVPairs<T> &req_data) {
+  std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> shapes =
+    std::make_shared<std::vector<std::shared_ptr<std::vector<size_t>>>>();
+  std::shared_ptr<std::vector<size_t>> input_shape = std::make_shared<std::vector<size_t>>();
+  std::shared_ptr<std::vector<size_t>> indices_shape = std::make_shared<std::vector<size_t>>();
+  std::shared_ptr<std::vector<size_t>> output_shape = std::make_shared<std::vector<size_t>>();
+  shapes->push_back(input_shape);
+  shapes->push_back(indices_shape);
+  shapes->push_back(output_shape);
+
+  const Key &key = req_data.keys[0];
+  const Lengths &lens = req_data.lens;
+  size_t index = 0;
+  for (int i = 0; i < lens[0]; i++) {
+    input_shape->push_back(static_cast<size_t>(req_data.vals[index++]));
+  }
+  for (int j = 0; j < lens[1]; j++) {
+    indices_shape->push_back(static_cast<size_t>(req_data.vals[index++]));
+  }
+  for (int k = 0; k < lens[2]; k++) {
+    output_shape->push_back(static_cast<size_t>(req_data.vals[index++]));
+  }
+  ps_->InitEmbeddingTable(key, shapes);
+}
+
+template <typename T>
+void ParameterServer<T>::ServerHandler::HandleEmbeddingLookup(const ::ps::KVMeta &req_meta,
+                                                              const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
+  const Key &key = req_data.keys[0];
+  ps_->DoEmbeddingLookup(key, req_data.vals, res);
+  for (size_t i = 0; i < req_data.vals.size(); i++) {
+    res->keys->push_back(req_data.vals[i]);
+  }
+}
+
+template <typename T>
+bool ParameterServer<T>::Init(const FuncGraphPtr &func_graph) {
+  const char *server_num = getenv(kEnvPServerNum);
+  const char *worker_num = getenv(kEnvWorkerNum);
+  if (server_num != nullptr) {
+    pserver_num_ = *server_num - '0';
+  }
+  if (worker_num != nullptr) {
+    worker_num_ = *worker_num - '0';
+  }
+  func_graph_ = func_graph;
+  rank_id_ = ::ps::MyRank();
+  handler_.reset(new ServerHandler(this));
+
+  InitOptimInfoBuilders();
+
+  ps_->set_request_handle(*handler_);
+  thread_.reset(new std::thread(&ParameterServer::UpdateWeights, this));
+  return true;
+}
+
+template <typename T>
+void ParameterServer<T>::InitOptimInfoBuilders() {
+  std::shared_ptr<OptimizerInfoBuilder> momentum_info_builder = std::make_shared<MomentumOptimInfoBuilder>();
+  std::shared_ptr<OptimizerInfoBuilder> sparse_adam_info_builder = std::make_shared<SparseAdamOptimInfoBuilder>();
+  std::shared_ptr<OptimizerInfoBuilder> sparse_ftrl_info_builder = std::make_shared<SparseFtrlOptimInfoBuilder>();
+  optim_info_builders_[kApplyMomentum] = momentum_info_builder;
+  optim_info_builders_[kSparseAdam] = sparse_adam_info_builder;
+  optim_info_builders_[kSparseFtrl] = sparse_ftrl_info_builder;
+}
+
+template <typename T>
+void ParameterServer<T>::InitWeightKeyToOptims(const Key &key, const int &optim_id) {
+  if (weight_key_to_optims_.count(key) > 0 || Util::optimizer_name(key) == "") {
+    return;
+  }
+  weight_key_to_optims_[key] = Util::optimizer_name(optim_id);
+}
+
+template <typename T>
+void ParameterServer<T>::InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths) {
+  InputsShapePtr inputs_shape = std::make_shared<InputsShape>();
+  int val_idx = 0;
+  const Key &key = keys[0];
+
+  if (optim_inputs_shape_.count(key) == 0) {
+    optim_inputs_shape_[key] = inputs_shape;
+  }
+  for (size_t i = 0; i < keys.size(); i++) {
+    auto shape = std::make_shared<std::vector<size_t>>();
+    inputs_shape->push_back(shape);
+
+    int len = lengths[i];
+    for (int j = 0; j < len; j++) {
+      shape->push_back(values[val_idx++]);
+    }
+  }
+  if (weight_key_to_optims_.count(key) > 0) {
+    const std::string &optim_name = weight_key_to_optims_[key];
+    if (optimizers_.count(optim_name) == 0 && optim_inputs_shape_.count(key) > 0) {
+      if (optim_name == kSparseAdam) {
+        std::shared_ptr<PServerKernel> optimizer =
+          std::make_shared<kernel::ps::SparseApplyAdamPSKernel>(rank_id_, pserver_num_);
+        optimizer->InitKernel(optim_inputs_shape_[key]);
+        optimizers_[optim_name] = optimizer;
+      } else if (optim_name == kApplyMomentum) {
+        std::shared_ptr<PServerKernel> optimizer =
+          std::make_shared<kernel::ps::ApplyMomentumPSKernel>(rank_id_, pserver_num_);
+        optimizer->InitKernel(optim_inputs_shape_[key]);
+        optimizers_[optim_name] = optimizer;
+      } else if (optim_name == kSparseFtrl) {
+        std::shared_ptr<PServerKernel> optimizer =
+          std::make_shared<kernel::ps::SparseApplyFtrlPSKernel>(rank_id_, pserver_num_);
+        optimizer->InitKernel(optim_inputs_shape_[key]);
+        optimizers_[optim_name] = optimizer;
+      }
+    }
+  }
+}
+
+template <typename T>
+void ParameterServer<T>::InitWeight(const Key &key, const WeightPtr &weight) {
+  if (weights_.count(key) == 0) {
+    weights_[key] = weight;
+  }
+}
+
+template <typename T>
+void ParameterServer<T>::InitGrad(const Key &key, const GradPtr &grad) {
+  if (grads_.count(key) == 0) {
+    grads_[key] = grad;
+    grads_accum_counter_[key] = 0;
+  }
+}
+
+template <typename T>
+void ParameterServer<T>::InitEmbeddingTable(
+  const Key &key, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
+  // Init embedding lookup kernel
+  std::shared_ptr<PServerKernel> lookup = std::make_shared<kernel::ps::EmbeddingLookUpPSKernel>(rank_id_, pserver_num_);
+  lookup->InitKernel(shapes);
+  embedding_lookup_ops_[key] = lookup;
+
+  // Init embedding weight
+  const std::vector<size_t> &input_shapes = lookup->input_sizes();
+  size_t total_dims = 1;
+  for (auto shape : input_shapes) {
+    total_dims *= shape;
+  }
+  WeightPtr embedding = std::make_shared<Weight>(total_dims, 0.01);
+  weights_[key] = embedding;
+
+  grads_accum_counter_[key] = 0;
+}
+
+template <typename T>
+void ParameterServer<T>::UpdateWeights() {
+  while (true) {
+    std::unique_lock<std::mutex> lock(mutex_);
+    apply_grads_cv_.wait(lock, [this] { return this->ReadyForUpdateWeights(); });
+
+    for (auto iter = weights_.begin(); iter != weights_.end(); iter++) {
+      Key key = iter->first;
+      WeightPtr weight_ptr = iter->second;
+
+      std::shared_ptr<PServerKernel> optimizer = nullptr;
+      if (weight_key_to_optims_.count(key) > 0) {
+        const std::string &optim_name = weight_key_to_optims_[key];
+        optimizer = optimizers_[optim_name];
+      }
+      MS_EXCEPTION_IF_NULL(optimizer);
+
+      std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
+      if (optim_info == nullptr) {
+        continue;
+      }
+      const WeightPtr &weight = weights_[key];
+      optim_info->UpdateWeight(weight);
+      const std::vector<kernel::AddressPtr> &inputs = optim_info->inputs();
+      const std::vector<kernel::AddressPtr> &workspaces = optim_info->workspaces();
+      const std::vector<kernel::AddressPtr> &outputs = optim_info->outputs();
+
+      optimizer->Execute(inputs, workspaces, outputs);
+      optim_info->Reset();
+    }
+    ResetGradAccumCount();
+    accum_grads_cv_.notify_all();
+  }
+}
+
+template <typename T>
+void ParameterServer<T>::AccumGrad(const Keys &keys, const Values &values, const Lengths &lengths) {
+  std::unique_lock<std::mutex> lock(mutex_);
+  accum_grads_cv_.wait(lock, [this] { return this->ReadyForAccumGrads(); });
+
+  const Key &key = keys[0];
+  std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
+
+  // Create or update the optimizer info
+  if (optim_info == nullptr) {
+    const std::shared_ptr<OptimizerInfoBuilder> &builder = optim_info_builders_[weight_key_to_optims_[key]];
+    std::shared_ptr<kernel::ps::PServerKernel> pserver_kernel = optimizers_[weight_key_to_optims_[key]];
+    if (pserver_kernel == nullptr) {
+      MS_LOG(EXCEPTION) << "no optimizer found for key " << key << " optim name " << weight_key_to_optims_[key];
+    }
+    MS_EXCEPTION_IF_NULL(pserver_kernel);
+    OptimizerInfo *optim =
+      builder->Build(pserver_kernel, weights_[key], keys, values, lengths, optim_inputs_shape_[key], worker_num_);
+    optim_info.reset(optim);
+    optim_infos_[key] = optim_info;
+  } else {
+    optim_info->Update(values, lengths);
+  }
+  MS_EXCEPTION_IF_NULL(optim_info);
+
+  optim_info->Accumulate(values, lengths);
+
+  grads_accum_counter_[key] += 1;
+  if (grads_accum_counter_[key] == worker_num_) {
+    grad_accum_count_++;
+  }
+  if (ReadyForUpdateWeights()) {
+    apply_grads_cv_.notify_one();
+  }
+}
+
+template <typename T>
+WeightPtr ParameterServer<T>::weight(const Key &key) {
+  std::unique_lock<std::mutex> lock(mutex_);
+
+  if (weights_.count(key) == 0) {
+    MS_LOG(ERROR) << "Invalid weight key " << key;
+    return nullptr;
+  }
+  WeightPtr weight_ptr = weights_[key];
+  WeightPtr copy_weight_ptr = std::make_shared<::ps::SArray<T>>(weight_ptr->size(), 0);
+  copy_weight_ptr->CopyFrom(weight_ptr->data(), weight_ptr->size());
+  return copy_weight_ptr;
+}
+
+template <typename T>
+void ParameterServer<T>::DoEmbeddingLookup(Key key, const LookupIds &lookup_ids, ::ps::KVPairs<T> *res) {
+  std::unique_lock<std::mutex> lock(mutex_);
+  if (weights_.count(key) == 0) {
+    MS_LOG(ERROR) << "Invalid embedding table key " << key;
+    return;
+  }
+  if (embedding_lookup_ops_.count(key) == 0) {
+    MS_LOG(ERROR) << "Invalid embedding lookup op key " << key;
+    return;
+  }
+  WeightPtr table_ptr = weights_[key];
+  std::shared_ptr<PServerKernel> table_lookup_op = embedding_lookup_ops_[key];
+
+  // Update shapes of lookup operator
+  std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> shapes =
+    std::make_shared<std::vector<std::shared_ptr<std::vector<size_t>>>>();
+  std::shared_ptr<std::vector<size_t>> indices_shape = std::make_shared<std::vector<size_t>>();
+  indices_shape->emplace_back(lookup_ids.size());
+  shapes->push_back(indices_shape);
+  table_lookup_op->ReInit(shapes);
+
+  const std::vector<size_t> output_shapes = table_lookup_op->output_sizes();
+  std::vector<kernel::AddressPtr> inputs;
+  AddressPtr embedding_table = std::make_shared<kernel::Address>();
+  AddressPtr indices = std::make_shared<kernel::Address>();
+  inputs.push_back(embedding_table);
+  inputs.push_back(indices);
+  embedding_table->addr = table_ptr->data();
+  embedding_table->size = table_ptr->size() * sizeof(T);
+  indices->addr = lookup_ids.data();
+  indices->size = lookup_ids.size() * sizeof(T);
+
+  std::vector<kernel::AddressPtr> workspaces;
+  std::vector<kernel::AddressPtr> outputs;
+  AddressPtr output = std::make_shared<kernel::Address>();
+  std::shared_ptr<Values> addr = std::make_shared<Values>(output_shapes[0] / sizeof(T), 0);
+
+  output->addr = addr->data();
+  output->size = output_shapes[0];
+  outputs.push_back(output);
+
+  table_lookup_op->Execute(inputs, workspaces, outputs);
+  res->vals = *addr;
+  res->lens.push_back(res.vals.size());
+}
+
+template <typename T>
+int ParameterServer<T>::SumOfShapes(const std::vector<int> &shapes) const {
+  int sum = 1;
+  for (auto shape : shapes) {
+    sum *= shape;
+  }
+  return sum;
+}
+
+template <typename T>
+size_t ParameterServer<T>::PreComputeCapacity(const Keys &keys, const Lengths &lens) {
+  size_t capacity = 0;
+  for (size_t i = 0; i < keys.size(); i++) {
+    Key key = keys[i];
+    if (embedding_row_lens_.count(key) > 0) {
+      capacity += embedding_row_lens_[key] * lens[i];
+    } else {
+      MS_LOG(ERROR) << "Invalid embedding lookup id " << key;
+    }
+  }
+  return capacity;
+}
+
+template <typename T>
+inline bool ParameterServer<T>::ReadyForUpdateWeights() {
+  return grads_accum_counter_.size() > 0 && grad_accum_count_ == grads_accum_counter_.size();
+}
+
+template <typename T>
+inline bool ParameterServer<T>::ReadyForAccumGrads() {
+  return grad_accum_count_ < weights_.size();
+}
+
+template <typename T>
+inline void ParameterServer<T>::ResetGradAccumCount() {
+  grad_accum_count_ = 0;
+  for (auto iter = grads_accum_counter_.begin(); iter != grads_accum_counter_.end(); iter++) {
+    grads_accum_counter_[iter->first] = 0;
+  }
+}
+
+template <typename T>
+void ParameterServer<T>::Run(const FuncGraphPtr &func_graph) {
+  ::ps::Start(0);
+  if (!::ps::IsServer()) {
+    std::cout << "This is not ther Server" << std::endl;
+    return;
+  }
+  Init(func_graph);
+  thread_->join();
+}
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_PARAMETER_SERVER_H_
diff --git a/mindspore/ccsrc/ir/primitive_base_extends.cc b/mindspore/ccsrc/frontend/parallel/ps/scheduler.cc
old mode 100644
new mode 100755
similarity index 68%
rename from mindspore/ccsrc/ir/primitive_base_extends.cc
rename to mindspore/ccsrc/frontend/parallel/ps/scheduler.cc
index 64bdafa4d17..274b7259b09
--- a/mindspore/ccsrc/ir/primitive_base_extends.cc
+++ b/mindspore/ccsrc/frontend/parallel/ps/scheduler.cc
@@ -1,25 +1,32 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/primitive_base.h"
-#include "pipeline/static_analysis/abstract_function.h"
-
-namespace mindspore {
-abstract::AbstractBasePtr Primitive::ToPrimAbstract(const AnfNodePtr &anf_node) {
-  auto prim_func = std::make_shared<abstract::PrimitiveAbstractClosure>(shared_from_base<Primitive>(), anf_node);
-  return prim_func;
-}
-}  // namespace mindspore
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "frontend/parallel/ps/scheduler.h"
+#include <unistd.h>
+#include "ps/ps.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+void Scheduler::Run() {
+  ::ps::Start(0);
+  while (true) {
+    sleep(1);
+  }
+}
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/frontend/parallel/ps/scheduler.h b/mindspore/ccsrc/frontend/parallel/ps/scheduler.h
new file mode 100755
index 00000000000..e656bcfd22c
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/scheduler.h
@@ -0,0 +1,40 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_
+namespace mindspore {
+namespace parallel {
+namespace ps {
+class Scheduler {
+ public:
+  static Scheduler &GetInstance() {
+    static Scheduler instance;
+    return instance;
+  }
+
+  void Run();
+
+ private:
+  Scheduler() = default;
+  ~Scheduler() = default;
+  Scheduler(const Scheduler &) = delete;
+  Scheduler &operator=(const Scheduler &) = delete;
+};
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_
diff --git a/mindspore/ccsrc/frontend/parallel/ps/util.cc b/mindspore/ccsrc/frontend/parallel/ps/util.cc
new file mode 100644
index 00000000000..fc63e88901d
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/util.cc
@@ -0,0 +1,128 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "frontend/parallel/ps/util.h"
+#include <unordered_map>
+#include "frontend/parallel/ps/common.h"
+#include "common/utils.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+std::unordered_map<std::string, int> Util::optimizer_to_ids{
+  {kApplyMomentum, 0},
+  {kSparseAdam, 1},
+  {kSparseFtrl, 2},
+};
+
+std::unordered_map<int, std::string> Util::id_to_optimizers{
+  {0, kApplyMomentum},
+  {1, kSparseAdam},
+  {2, kSparseFtrl},
+};
+bool Util::IsParamServerMode() { return IsRoleOfWorker() || IsRoleOfPServer() || IsRoleOfScheduler(); }
+
+bool Util::IsRoleOfWorker() {
+  auto role = common::GetEnv(kEnvRole);
+  if (strcmp(role.c_str(), kEnvRoleOfWorker) == 0) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool Util::IsRoleOfPServer() {
+  auto role = common::GetEnv(kEnvRole);
+  if (strcmp(role.c_str(), kEnvRoleOfPServer) == 0) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool Util::IsRoleOfScheduler() {
+  auto role = common::GetEnv(kEnvRole);
+  if (strcmp(role.c_str(), kEnvRoleOfScheduler) == 0) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void Util::SetInternalEnvVar() {
+  if (IsParamServerMode()) {
+    auto comm_type = common::GetEnv(kEnvCommType);
+    if (comm_type.size() > 0) {
+      (void)common::SetEnv(kDmlcCommType, comm_type.c_str());
+    }
+    auto interface = common::GetEnv(kEnvInterface);
+    if (interface.size() > 0) {
+      (void)common::SetEnv(kDmlcInterface, interface.c_str());
+    }
+    auto server_num = common::GetEnv(kEnvPServerNum);
+    if (server_num.size() > 0) {
+      (void)common::SetEnv(kDmlcPServerNum, server_num.c_str());
+    }
+    auto worker_num = common::GetEnv(kEnvWorkerNum);
+    if (worker_num.size() > 0) {
+      (void)common::SetEnv(kDmlcWorkerNum, worker_num.c_str());
+    }
+    if (IsRoleOfScheduler()) {
+      (void)common::SetEnv(kDmlcRole, kRoleOfScheduler);
+    } else if (IsRoleOfPServer()) {
+      (void)common::SetEnv(kDmlcRole, kRoleOfPServer);
+    } else if (IsRoleOfWorker()) {
+      (void)common::SetEnv(kDmlcRole, kRoleOfWorker);
+    }
+    auto scheduler_host = common::GetEnv(kEnvSchedulerHost);
+    if (scheduler_host.size() > 0) {
+      (void)common::SetEnv(kDmlcSchedulerHost, scheduler_host.c_str());
+    }
+    auto scheduler_port = common::GetEnv(kEnvSchedulerPort);
+    if (scheduler_port.size() > 0) {
+      (void)common::SetEnv(kDmlcSchedulerPort, scheduler_port.c_str());
+    }
+  }
+}
+
+int Util::optimizer_id(std::string name) {
+  if (optimizer_to_ids.count(name) > 0) {
+    return optimizer_to_ids[name];
+  }
+  return -1;
+}
+
+std::string Util::optimizer_name(int id) {
+  if (id_to_optimizers.count(id) > 0) {
+    return id_to_optimizers[id];
+  }
+  return "";
+}
+
+bool Util::is_optimizer(std::string name) { return optimizer_to_ids.count(name) > 0; }
+
+int Util::LocalShard(int first_dim, int rank_id, int server_num) {
+  int shard_size = std::round((static_cast<float>(first_dim)) / server_num);
+  int remain_size = first_dim % server_num;
+  if (remain_size == 0 || rank_id < server_num - 1) {
+    return shard_size;
+  } else {
+    return first_dim - (shard_size * (server_num - 1));
+  }
+}
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/frontend/parallel/ps/util.h b/mindspore/ccsrc/frontend/parallel/ps/util.h
new file mode 100644
index 00000000000..8947ad36dea
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/util.h
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_
+
+#include <map>
+#include <string>
+#include <unordered_map>
+#include "backend/session/anf_runtime_algorithm.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+class Util {
+ public:
+  static bool IsParamServerMode();
+  static bool IsRoleOfWorker();
+  static bool IsRoleOfPServer();
+  static bool IsRoleOfScheduler();
+  static void SetInternalEnvVar();
+  static int optimizer_id(std::string name);
+  static std::string optimizer_name(int id);
+  static bool is_optimizer(std::string name);
+  static int LocalShard(int first_dim, int rank_id, int server_num);
+
+ private:
+  static std::unordered_map<std::string, int> optimizer_to_ids;
+  static std::unordered_map<int, std::string> id_to_optimizers;
+};
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_
diff --git a/mindspore/ccsrc/frontend/parallel/ps/worker.h b/mindspore/ccsrc/frontend/parallel/ps/worker.h
new file mode 100644
index 00000000000..9ecbc28fc51
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/worker.h
@@ -0,0 +1,259 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_WORKER_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_WORKER_H_
+
+#include <utility>
+#include <memory>
+#include <vector>
+#include <string>
+#include <map>
+#include "ps/ps.h"
+#include "utils/log_adapter.h"
+#include "frontend/parallel/ps/util.h"
+#include "frontend/parallel/ps/common.h"
+#include "frontend/parallel/ps/worker_proxy.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+template <typename T>
+class Worker {
+ public:
+  static Worker &GetInstance() {
+    static Worker instance;
+    return instance;
+  }
+
+  void Run();
+  void Push(const std::vector<size_t> &keys, std::vector<uintptr_t> addrs, const std::vector<int> &sizes);
+  void Pull(const size_t key, void *dev_addr, const size_t size);
+  size_t SetParamKey(const std::string &param_name);
+  void SetKeyOptimId(size_t key, const std::string &optimizer_name);
+  void SetOptimInputShapes(size_t key, const std::vector<int> &shape);
+  void AddEmbeddingTable(const ::ps::Key &key, const size_t &row_count);
+  void InitPSEmbeddingTable(const std::vector<size_t> &keys, std::vector<size_t> shapes, const std::vector<int> &sizes);
+  void InitPSParamAndOptim(const std::string &param_name, void *param_data, size_t param_size);
+  void DoPSEmbeddingLookup(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &lookup_ids,
+                           const ::ps::SArray<int> &lens, ::ps::SArray<T> *lookup_result, int cmd);
+
+ private:
+  Worker() : kv_worker_(nullptr), running_(false), key_cnt_(0) {}
+  ~Worker() { ::ps::Finalize(0, true); }
+  Worker(const Worker &) = delete;
+  Worker &operator=(const Worker &) = delete;
+
+  bool IsKeyInit(const size_t key);
+  size_t GetParamKey(const std::string &param_name);
+  void InitPSOptimId(const size_t param_key);
+  void InitPSOptimInputShapes(const size_t key);
+  void InitPSParamData(const std::vector<size_t> &keys, void *origin_addr, size_t size);
+  static void EmbeddingLookupIdSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &ranges,
+                                      std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {}
+
+  std::shared_ptr<WorkerProxy<T>> kv_worker_;
+  bool running_;
+  size_t key_cnt_;
+  std::map<std::string, size_t> param_to_key_;
+  std::map<size_t, bool> init_keys_;
+  std::map<size_t, int> key_to_optimId_;
+  std::map<size_t, std::vector<std::vector<int>>> key_to_optim_shapes_;
+};
+
+template <typename T>
+void Worker<T>::Run() {
+  if (running_) {
+    MS_LOG(INFO) << "'Worker is already running.";
+    return;
+  }
+
+  ::ps::Start(0);
+  if (!::ps::IsWorker()) {
+    MS_LOG(EXCEPTION) << "The role is not worker.";
+  }
+  kv_worker_ = std::make_shared<WorkerProxy<T>>(0, 0, 1);
+  running_ = true;
+}
+
+template <typename T>
+void Worker<T>::Push(const std::vector<size_t> &keys, std::vector<uintptr_t> addrs, const std::vector<int> &sizes) {
+  size_t total_size = 0;
+  for (auto size : sizes) {
+    total_size += size;
+  }
+  ::ps::SArray<T> total_buffer(total_size, 0);
+  size_t offset = 0;
+  for (size_t i = 0; i < sizes.size(); i++) {
+    memcpy(total_buffer.data() + offset / sizeof(T), addrs[i], sizes[i] * sizeof(T));
+    offset += sizes[i] * sizeof(T);
+  }
+  kv_worker_->PushData(::ps::SArray<::ps::Key>(keys), total_buffer, ::ps::SArray<int>(sizes));
+}
+
+template <typename T>
+void Worker<T>::Pull(const size_t key, void *dev_addr, const size_t size) {
+  ::ps::SArray<T> variables(size / sizeof(T), 0);
+  kv_worker_->Wait(kv_worker_->ZPull({key}, &variables));
+  memcpy(dev_addr, variables.data(), size);
+}
+
+template <typename T>
+void Worker<T>::DoPSEmbeddingLookup(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &lookup_ids,
+                                    const ::ps::SArray<int> &lens, ::ps::SArray<T> *lookup_result, int cmd) {
+  kv_worker_->EmbeddingLookup(keys, lookup_ids, lens, &lookup_result, cmd);
+}
+
+template <typename T>
+void Worker<T>::InitPSParamData(const std::vector<size_t> &keys, void *origin_addr, size_t size) {
+  ::ps::SArray<T> addr(reinterpret_cast<T *>(origin_addr), size / sizeof(T));
+  ::ps::SArray<::ps::Key> key(keys);
+  ::ps::SArray<int> lens;
+  lens.push_back(addr.size());
+  kv_worker_->Wait(kv_worker_->ZPush(key, addr, lens, kInitWeightsCmd));
+  init_keys_[key[0]] = true;
+}
+
+template <typename T>
+void Worker<T>::SetOptimInputShapes(size_t key, const std::vector<int> &shape) {
+  if (key_to_optim_shapes_.find(key) == key_to_optim_shapes_.end()) {
+    key_to_optim_shapes_[key] = {shape};
+  } else {
+    key_to_optim_shapes_[key].push_back(shape);
+  }
+}
+
+template <typename T>
+void Worker<T>::InitPSOptimInputShapes(const size_t key) {
+  ::ps::SArray<::ps::Key> keys;
+  ::ps::SArray<int> shape_len;
+  ::ps::SArray<T> all_shape;
+  std::vector<std::vector<int>> shapes = key_to_optim_shapes_[key];
+  for (auto shape : shapes) {
+    keys.push_back(key);
+    if (shape.size() == 0) {
+      shape_len.push_back(1);
+      all_shape.push_back(1);
+    } else {
+      shape_len.push_back(SizeToInt(shape.size()));
+      for (auto dim : shape) {
+        all_shape.push_back(static_cast<T>(dim));
+      }
+    }
+  }
+  MS_LOG(ERROR) << "keys:" << keys;
+  MS_LOG(ERROR) << "shape_len:" << shape_len;
+  MS_LOG(ERROR) << "all_shape:" << all_shape;
+  if (!init_keys_[key]) {
+    init_keys_[key] = true;
+  }
+  kv_worker_->PushData(keys, all_shape, shape_len, kInitOptimInputsShapeCmd);
+}
+
+template <typename T>
+bool Worker<T>::IsKeyInit(const size_t key) {
+  if (init_keys_.find(key) == init_keys_.end() || !init_keys_[key]) {
+    return false;
+  }
+  return true;
+}
+
+template <typename T>
+size_t Worker<T>::SetParamKey(const std::string &param_name) {
+  size_t key = UINT64_MAX;
+  if (param_to_key_.count(param_name)) {
+    key = param_to_key_[param_name];
+    MS_LOG(INFO) << param_name << " key is already set: key value is " << key;
+  } else {
+    key = key_cnt_++;
+    param_to_key_[param_name] = key;
+    MS_LOG(INFO) << "Set key " << key << " for parameter " << param_name;
+  }
+  return key;
+}
+
+template <typename T>
+size_t Worker<T>::GetParamKey(const std::string &param_name) {
+  size_t key = kInvalidKey;
+  if (param_to_key_.find(param_name) != param_to_key_.end()) {
+    key = param_to_key_[param_name];
+    MS_LOG(ERROR) << "Get key of parameter " << param_name << " key is " << key;
+  }
+  return key;
+}
+
+template <typename T>
+void Worker<T>::SetKeyOptimId(size_t key, const std::string &optimizer_name) {
+  key_to_optimId_[key] = Util::optimizer_id(optimizer_name);
+}
+
+template <typename T>
+void Worker<T>::InitPSOptimId(const size_t param_key) {
+  if (key_to_optimId_.count(param_key) == 0) {
+    MS_LOG(EXCEPTION) << "Can't find optimizer id of parameter key " << param_key;
+  }
+  int optim_id = key_to_optimId_[param_key];
+
+  ::ps::SArray<::ps::Key> keys = {param_key};
+  ::ps::SArray<T> optim_id_vals = {static_cast<T>(optim_id)};
+  ::ps::SArray<int> optim_id_lens = {optim_id_vals.size()};
+  kv_worker_->PushData(keys, optim_id_vals, optim_id_lens, kInitWeightToOptimIdCmd);
+}
+
+template <typename T>
+void Worker<T>::InitPSEmbeddingTable(const std::vector<size_t> &keys, std::vector<size_t> shapes,
+                                     const std::vector<int> &sizes) {
+  bool has_init = IsKeyInit(keys[0]);
+  if (has_init) {
+    MS_LOG(DEBUG) << "The key embedding table of key " << keys[0] << " is initialized.";
+    return;
+  }
+  ::ps::SArray<T> shapes_val;
+  for (auto dim : shapes) {
+    shapes_val.push_back(static_cast<T>(dim));
+  }
+  kv_worker_->Wait(kv_worker_->InitEmbeddingTable(::ps::SArray<::ps::Key>(keys), shapes_val, ::ps::SArray<int>(sizes)));
+}
+
+template <typename T>
+// Initialize parameters and optimizer kernels of Parameter Server.
+void Worker<T>::InitPSParamAndOptim(const std::string &param_name, void *param_data, size_t param_size) {
+  size_t param_key = GetParamKey(param_name);
+  if (param_key == kInvalidKey) {
+    MS_LOG(INFO) << "Parameter " << param_name << " has no key assigned.";
+    return;
+  }
+  bool init = IsKeyInit(param_key);
+  if (!init) {
+    MS_LOG(INFO) << "Init paramter and optimizer in parameter server side for " << param_name;
+    // No need to push embedding table data to Parameter Server.
+    if (param_name.find("embedding_table") == std::string::npos && param_name.find("wide_w") == std::string::npos) {
+      InitPSParamData({param_key}, param_data, param_size);
+    }
+    InitPSOptimId(param_key);
+    InitPSOptimInputShapes(param_key);
+  }
+}
+
+template <typename T>
+void Worker<T>::AddEmbeddingTable(const ::ps::Key &key, const size_t &row_count) {
+  kv_worker_->AddEmbeddingTable(key, row_count);
+}
+
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_WORKER_H_
diff --git a/mindspore/ccsrc/frontend/parallel/ps/worker_proxy.h b/mindspore/ccsrc/frontend/parallel/ps/worker_proxy.h
new file mode 100644
index 00000000000..a0f58d39a4a
--- /dev/null
+++ b/mindspore/ccsrc/frontend/parallel/ps/worker_proxy.h
@@ -0,0 +1,311 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_WORKER_PROXY_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_WORKER_PROXY_H_
+
+#include <unordered_map>
+#include <algorithm>
+#include <utility>
+#include <memory>
+#include <vector>
+#include "ps/ps.h"
+#include "frontend/parallel/ps/util.h"
+
+namespace mindspore {
+namespace parallel {
+namespace ps {
+template <typename T>
+class WorkerProxy : public ::ps::KVWorker<T> {
+ public:
+  using Worker = ::ps::KVWorker<T>;
+  using Callback = std::function<void()>;
+  using SlicedKVs = std::vector<std::pair<bool, ::ps::KVPairs<T>>>;
+  using Slicer =
+    std::function<void(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &ranges, SlicedKVs *sliced)>;
+  using ::ps::SimpleApp::obj_;
+  explicit WorkerProxy(int app_id, int customer_id, int lookup_customer_id) : Worker(app_id, customer_id) {
+    using _1 = std::placeholders::_1;
+    using _2 = std::placeholders::_2;
+    using _3 = std::placeholders::_3;
+    lookup_customer_ = std::unique_ptr<::ps::Customer>(
+      new ::ps::Customer(app_id, lookup_customer_id, std::bind(&WorkerProxy<T>::ProcessLookupResult, this, _1)));
+    lookup_slicer_ = std::bind(&WorkerProxy<T>::LookupIdSlicer, this, _1, _2, _3);
+    init_embedding_slicer_ = std::bind(&WorkerProxy<T>::EmbeddingTableInitSlicer, this, _1, _2, _3);
+    push_slicer_ = std::bind(&WorkerProxy<T>::PushSlicer, this, _1, _2, _3);
+    broadcast_slicer_ = std::bind(&WorkerProxy<T>::BroadcastSlicer, this, _1, _2, _3);
+  }
+  ~WorkerProxy() override = default;
+
+  void AddEmbeddingTable(const ::ps::Key &key, const size_t &row_count);
+  void EmbeddingLookup(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &lookup_ids,
+                       const ::ps::SArray<int> &lens, ::ps::SArray<T> *outs, int cmd = 0, const Callback &cb = nullptr,
+                       int priority = 0);
+  int InitEmbeddingTable(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals,
+                         const ::ps::SArray<int> &lens = {}, const Callback &cb = nullptr, int priority = 0);
+  void PushData(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals, const ::ps::SArray<int> &lens = {},
+                int cmd = 0, int priority = 0);
+
+ private:
+  template <typename C>
+  int AddLookupCB(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &lookup_ids, C *vals, int cmd,
+                  const Callback &cb);
+  void LookupIdSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                      std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
+  void EmbeddingTableInitSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                                std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
+  void PushSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                  std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
+  void BroadcastSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                       std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
+  void ProcessLookupResult(const ::ps::Message &msg);
+  void Send(::ps::Customer *customer, int timestamp, bool push, bool pull, int cmd, const ::ps::KVPairs<T> &kvs,
+            const Slicer &slicer);
+
+  std::unique_ptr<::ps::Customer> lookup_customer_;
+  std::unordered_map<::ps::Key, std::shared_ptr<std::vector<::ps::Range>>> embedding_table_ranges_;
+  std::unordered_map<int, std::vector<::ps::KVPairs<T>>> lookup_results_;
+  std::mutex mutex_;
+  Slicer lookup_slicer_;
+  Slicer init_embedding_slicer_;
+  Slicer push_slicer_;
+  Slicer broadcast_slicer_;
+  std::unordered_map<int, Callback> lookup_callbacks_;
+};
+
+template <typename T>
+void WorkerProxy<T>::AddEmbeddingTable(const ::ps::Key &key, const size_t &row_count) {
+  uint64_t begin = 0;
+  uint64_t end = 0;
+  int server_num = ::ps::NumServers();
+  for (int i = 0; i < server_num; i++) {
+    int local_row_cnt = Util::LocalShard(row_count, i, server_num);
+    if (i == 0) {
+      end = local_row_cnt - 1;
+    } else {
+      begin = end + 1;
+      end += local_row_cnt;
+    }
+    ::ps::Range range(begin, end);
+    if (embedding_table_ranges_.count(key) == 0) {
+      embedding_table_ranges_[key] = std::make_shared<std::vector<::ps::Range>>();
+    }
+    embedding_table_ranges_[key]->push_back(range);
+  }
+}
+
+template <typename T>
+void WorkerProxy<T>::EmbeddingLookup(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &lookup_ids,
+                                     const ::ps::SArray<int> &lens, ::ps::SArray<T> *outs, int cmd, const Callback &cb,
+                                     int priority) {
+  int ts = AddLookupCB(keys, lookup_ids, outs, cmd, cb);
+  ::ps::KVPairs<T> kvs;
+  kvs.keys = keys;
+  kvs.vals = lookup_ids;
+  kvs.lens = lens;
+  kvs.priority = priority;
+  Send(lookup_customer_.get(), ts, true, true, cmd, kvs, broadcast_slicer_);
+  lookup_customer_->WaitRequest(ts);
+}
+
+template <typename T>
+int WorkerProxy<T>::InitEmbeddingTable(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals,
+                                       const ::ps::SArray<int> &lens, const Callback &cb, int priority) {
+  int ts = obj_->NewRequest(::ps::kServerGroup);
+  ::ps::KVPairs<T> kvs;
+  kvs.keys = keys;
+  kvs.vals = vals;
+  kvs.lens = lens;
+  kvs.priority = priority;
+  Send(obj_, ts, true, false, kInitEmbeddingsCmd, kvs, init_embedding_slicer_);
+  return ts;
+}
+
+template <typename T>
+void WorkerProxy<T>::PushData(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals,
+                              const ::ps::SArray<int> &lens, int cmd, int priority) {
+  int ts = obj_->NewRequest(::ps::kServerGroup);
+  ::ps::KVPairs<T> kvs;
+  kvs.keys = keys;
+  kvs.vals = vals;
+  kvs.lens = lens;
+  kvs.priority = priority;
+  Send(obj_, ts, true, false, cmd, kvs, push_slicer_);
+  obj_->WaitRequest(ts);
+}
+
+template <typename T>
+template <typename C>
+int WorkerProxy<T>::AddLookupCB(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &lookup_ids,
+                                C *lookup_result, int cmd, const Callback &cb) {
+  int ts = lookup_customer_->NewRequest(::ps::kServerGroup);
+  const auto &callback = [this, ts, keys, lookup_ids, lookup_result, cb]() mutable {
+    mutex_.lock();
+    auto &kvs = lookup_results_[ts];
+    mutex_.unlock();
+
+    size_t total_len = 0;
+    const auto &s = kvs[0];
+    for (size_t i = 0; i < s.lens.size(); i++) {
+      total_len += s.lens[i];
+    }
+    lookup_result->resize(total_len, 0);
+    T *result_addr = lookup_result->data();
+
+    for (const auto &s : kvs) {
+      size_t offset = 0;
+      for (size_t i = 0; i < s.vals.size(); i++) {
+        result_addr[offset++] += s.vals[i];
+      }
+    }
+
+    mutex_.lock();
+    lookup_results_.erase(ts);
+    mutex_.unlock();
+    if (cb) cb();
+  };
+  lookup_callbacks_[ts] = callback;
+  return ts;
+}
+
+template <typename T>
+void WorkerProxy<T>::LookupIdSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                                    std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
+  int *data = send.lens.data();
+  size_t size = send.lens.size();
+  std::vector<int> lookup_ids(data, data + size);
+  std::sort(lookup_ids.begin(), lookup_ids.end());
+
+  const Key &key = send.keys[0];
+  const std::vector<::ps::Range> &ranges = *(embedding_table_ranges_[key]);
+  sliced->resize(ranges.size());
+
+  size_t index = 0;
+  for (size_t i = 0; i < ranges.size(); i++) {
+    const ::ps::Range &range = ranges[i];
+    const auto &begin = range.begin();
+    const auto &end = range.end();
+    auto &kvs = sliced->at(i).second;
+
+    auto lookup_id = static_cast<uint64_t>(lookup_ids[index]);
+    while (lookup_id >= begin && lookup_id <= end) {
+      kvs.vals.push_back(lookup_id);
+      if (++index >= lookup_ids.size()) {
+        break;
+      }
+      lookup_id = static_cast<uint64_t>(lookup_ids[index]);
+    }
+    kvs.keys.push_back(key);
+    kvs.lens.push_back(kvs.vals.size());
+
+    if (kvs.vals.size() == 0) {
+      sliced->at(i).first = false;
+    } else {
+      sliced->at(i).first = true;
+    }
+  }
+}
+
+template <typename T>
+void WorkerProxy<T>::EmbeddingTableInitSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                                              std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
+  const Key &key = send.keys[0];
+  const std::vector<::ps::Range> &ranges = *(embedding_table_ranges_[key]);
+  sliced->resize(ranges.size());
+  for (size_t i = 0; i < ranges.size(); i++) {
+    sliced->at(i).first = true;
+    sliced->at(i).second = send;
+  }
+}
+
+template <typename T>
+void WorkerProxy<T>::PushSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                                std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
+  auto server_num = ::ps::Postoffice::Get()->num_servers();
+  sliced->resize(server_num);
+  for (int i = 0; i < server_num; i++) {
+    sliced->at(i).first = true;
+    sliced->at(i).second = send;
+  }
+}
+
+template <typename T>
+void WorkerProxy<T>::BroadcastSlicer(const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
+                                     std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
+  auto server_num = ::ps::Postoffice::Get()->num_servers();
+  sliced->resize(server_num);
+  for (int i = 0; i < server_num; i++) {
+    sliced->at(i).first = true;
+    sliced->at(i).second = send;
+  }
+}
+
+template <typename T>
+void WorkerProxy<T>::ProcessLookupResult(const ::ps::Message &msg) {
+  int ts = msg.meta.timestamp;
+  if (msg.meta.pull) {
+    CHECK_GE(msg.data.size(), (size_t)2);
+    ::ps::KVPairs<T> kvs;
+    kvs.keys = msg.data[0];
+    kvs.vals = msg.data[1];
+    if (msg.data.size() > (size_t)2) {
+      kvs.lens = msg.data[2];
+    }
+    mutex_.lock();
+    lookup_results_[ts].push_back(kvs);
+    mutex_.unlock();
+  }
+  if (lookup_customer_->NumResponse(ts) == ::ps::Postoffice::Get()->num_servers() - 1) {
+    const auto &cb = lookup_callbacks_[ts];
+    cb();
+    lookup_callbacks_.erase(ts);
+  }
+}
+
+template <typename T>
+void WorkerProxy<T>::Send(::ps::Customer *customer, int timestamp, bool push, bool pull, int cmd,
+                          const ::ps::KVPairs<T> &kvs, const Slicer &slicer) {
+  SlicedKVs sliced;
+  slicer(kvs, ::ps::Postoffice::Get()->GetServerKeyRanges(), &sliced);
+
+  for (size_t i = 0; i < sliced.size(); i++) {
+    const auto &s = sliced[i];
+    if (!s.first) continue;
+    ::ps::Message msg;
+    msg.meta.app_id = customer->app_id();
+    msg.meta.customer_id = customer->customer_id();
+    msg.meta.request = true;
+    msg.meta.push = push;
+    msg.meta.pull = pull;
+    msg.meta.head = cmd;
+    msg.meta.timestamp = timestamp;
+    msg.meta.recver = ::ps::Postoffice::Get()->ServerRankToID(i);
+    msg.meta.priority = kvs.priority;
+    const auto &kvs = s.second;
+    if (kvs.keys.size()) {
+      msg.AddData(kvs.keys);
+      msg.AddData(kvs.vals);
+      if (kvs.lens.size()) {
+        msg.AddData(kvs.lens);
+      }
+    }
+    ::ps::Postoffice::Get()->van()->Send(msg);
+  }
+}
+}  // namespace ps
+}  // namespace parallel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_WORKER_PROXY_H_
diff --git a/mindspore/ccsrc/parallel/status.h b/mindspore/ccsrc/frontend/parallel/status.h
similarity index 100%
rename from mindspore/ccsrc/parallel/status.h
rename to mindspore/ccsrc/frontend/parallel/status.h
diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/step_auto_parallel.cc
rename to mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc
index 894177df8d2..8d54eb454a9 100644
--- a/mindspore/ccsrc/parallel/step_auto_parallel.cc
+++ b/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/step_auto_parallel.h"
+#include "frontend/parallel/step_auto_parallel.h"
 
 #include <inttypes.h>
 #include <sys/time.h>
@@ -28,23 +28,23 @@
 #include <vector>
 
 #include "ir/anf.h"
-#include "ir/param_value_py.h"
+#include "ir/param_value.h"
 #include "ir/tensor.h"
-#include "optimizer/opt.h"
-#include "optimizer/optimizer.h"
-#include "parallel/auto_parallel/dp_algo_costmodel.h"
-#include "parallel/auto_parallel/edge_costmodel.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/auto_parallel/rec_core/rec_generate_strategy.h"
-#include "parallel/auto_parallel/rec_core/rec_parse_graph.h"
-#include "parallel/auto_parallel/rec_core/rec_partition.h"
-#include "parallel/context.h"
-#include "parallel/ops_info/tmp_identity_info.h"
-#include "parallel/ops_info/reshape_info.h"
-#include "parallel/step_parallel.h"
-#include "parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/pipeline.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/parallel/auto_parallel/dp_algo_costmodel.h"
+#include "frontend/parallel/auto_parallel/edge_costmodel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_parse_graph.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_partition.h"
+#include "frontend/parallel/context.h"
+#include "frontend/parallel/ops_info/tmp_identity_info.h"
+#include "frontend/parallel/ops_info/reshape_info.h"
+#include "frontend/parallel/step_parallel.h"
+#include "frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/pipeline.h"
 
 namespace mindspore {
 namespace parallel {
@@ -123,9 +123,8 @@ std::vector<bool> ExtractInputParameterByNode(const CNodePtr &node) {
     if (input->isa<Parameter>()) {
       auto input_parameter = input->cast<ParameterPtr>();
       if (input_parameter->has_default()) {
-        auto param_value = std::dynamic_pointer_cast<ParamValuePy>(input_parameter->default_param());
-        bool require_grad = py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "requires_grad"));
-        is_parameter.push_back(require_grad);
+        bool requires_grad = input_parameter->default_param()->requires_grad();
+        is_parameter.push_back(requires_grad);
       } else {
         is_parameter.push_back(false);
       }
@@ -799,9 +798,8 @@ void AugmentCostGraph(const std::vector<AnfNodePtr> &all_nodes) {
       auto casted_target_parameter = target_parameter->cast<ParameterPtr>();
       MS_EXCEPTION_IF_NULL(casted_target_parameter);
       if (casted_target_parameter->has_default()) {
-        auto param_value = std::dynamic_pointer_cast<ParamValuePy>(casted_target_parameter->default_param());
-        bool require_grad = py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "requires_grad"));
-        is_parameter.push_back(require_grad);
+        bool requires_grad = casted_target_parameter->default_param()->requires_grad();
+        is_parameter.push_back(requires_grad);
       } else {
         is_parameter.push_back(false);
       }
diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.h b/mindspore/ccsrc/frontend/parallel/step_auto_parallel.h
similarity index 95%
rename from mindspore/ccsrc/parallel/step_auto_parallel.h
rename to mindspore/ccsrc/frontend/parallel/step_auto_parallel.h
index c923e5770f3..f87d49b7362 100644
--- a/mindspore/ccsrc/parallel/step_auto_parallel.h
+++ b/mindspore/ccsrc/frontend/parallel/step_auto_parallel.h
@@ -22,9 +22,9 @@
 #include <string>
 #include <vector>
 #include "ir/anf.h"
-#include "optimizer/opt.h"
-#include "parallel/status.h"
-#include "pipeline/pipeline.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/parallel/status.h"
+#include "pipeline/jit/pipeline.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_parallel.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/step_parallel.cc
rename to mindspore/ccsrc/frontend/parallel/step_parallel.cc
index 7d1200b1904..6b9cfd9d370 100644
--- a/mindspore/ccsrc/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/frontend/parallel/step_parallel.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/step_parallel.h"
 
 #include <inttypes.h>
 #include <sys/time.h>
@@ -28,22 +28,22 @@
 #include <utility>
 
 #include "ir/tensor.h"
-#include "ir/param_value_py.h"
-#include "operator/ops.h"
-#include "optimizer/optimizer.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/context.h"
-#include "parallel/device_manager.h"
-#include "parallel/dynamic_creator.h"
-#include "parallel/graph_util/generate_graph.h"
-#include "parallel/graph_util/graph_info.h"
-#include "parallel/graph_util/node_info.h"
-#include "parallel/node_check.h"
-#include "parallel/ops_info/matmul_info.h"
-#include "parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
+#include "ir/param_value.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/context.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/dynamic_creator.h"
+#include "frontend/parallel/graph_util/generate_graph.h"
+#include "frontend/parallel/graph_util/graph_info.h"
+#include "frontend/parallel/graph_util/node_info.h"
+#include "frontend/parallel/node_check.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
 #include "utils/comm_manager.h"
 #include "utils/symbolic.h"
-#include "pipeline/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/prim.h"
 
 using mindspore::tensor::Tensor;
 
@@ -536,7 +536,7 @@ std::vector<AnfNodePtr> ReplaceOpInput(const Operator &replace_op, const std::st
   }
   std::vector<AnfNodePtr> replace_input = {NewValueNode(pyop_instance), node->input(1)};
   auto prim = GetValueNode<PrimitivePtr>(node->input(0));
-  if (prim->name() == GATHERV2 || prim->name() == SPARSE_GATHERV2) {
+  if (prim->name() == EMBEDDING_LOOKUP) {
     replace_input = {NewValueNode(pyop_instance), node->input(1), node->input(2)};
   }
   if (!params.empty()) {
@@ -611,6 +611,12 @@ void StepReplaceOp(OperatorVector replace_op, const CNodePtr &node) {
     ScopePtr scope = node->scope();
     MS_EXCEPTION_IF_NULL(scope);
     replace_node->set_scope(scope);
+    PrimitivePtr prim = GetValueNode<PrimitivePtr>(replace_node->input(0));
+    if (prim->name() == EMBEDDING_LOOKUP) {
+      auto attrs = prim->attrs();
+      attrs[TARGET] = MakeValue(CPU);
+      (void)prim->SetAttrs(attrs);
+    }
     if (index == replace_op.size() - 1) {
       (void)replace_node->set_operator_info(node->operator_info());
     }
@@ -1298,9 +1304,7 @@ bool ParameterIsCloned(const FuncGraphPtr &root, const AnfNodePtr &parameter_nod
     return false;
   }
 
-  auto param_value = std::dynamic_pointer_cast<ParamValuePy>(cloned_parameter->default_param());
-  py::object clone_info = parse::python_adapter::GetPyObjAttr(param_value->value(), CLONE_INFO);
-  bool cloned = py::cast<bool>(parse::python_adapter::GetPyObjAttr(clone_info, CLONED));
+  bool cloned = cloned_parameter->default_param()->cloned();
   if (!cloned) {
     return false;
   }
@@ -1321,9 +1325,7 @@ void SetClonedTensorShapeForOptimizer(const FuncGraphPtr &root) {
     }
 
     // get the cloned index
-    auto param_value = std::dynamic_pointer_cast<ParamValuePy>(cloned_parameter->default_param());
-    py::object cloned_info = parse::python_adapter::GetPyObjAttr(param_value->value(), CLONE_INFO);
-    int32_t cloned_index = py::cast<int32_t>(parse::python_adapter::GetPyObjAttr(cloned_info, CLONED_INDEX));
+    int32_t cloned_index = cloned_parameter->default_param()->cloned_index();
 
     // find the be cloned parameter
     bool found_be_cloned_parameter = false;
@@ -1337,21 +1339,17 @@ void SetClonedTensorShapeForOptimizer(const FuncGraphPtr &root) {
         continue;
       }
 
-      auto param_value_cloned = std::dynamic_pointer_cast<ParamValuePy>(be_cloned_parameter->default_param());
-      py::object be_cloned_info = parse::python_adapter::GetPyObjAttr(param_value_cloned->value(), CLONE_INFO);
-      if (!py::cast<bool>(parse::python_adapter::GetPyObjAttr(be_cloned_info, BE_CLONED))) {
+      const auto &param_value_cloned = be_cloned_parameter->default_param();
+      if (!param_value_cloned->be_cloned()) {
         continue;
       }
 
       // get the be cloned index
-      py::list be_cloned_index = parse::python_adapter::GetPyObjAttr(be_cloned_info, BE_CLONED_INDEX);
-      for (auto &index : be_cloned_index) {
-        if (cloned_index == py::cast<int32_t>(index)) {
-          found_be_cloned_parameter = true;
-          cloned_from_parameter = be_cloned_parameter;
-          cloned_from_node = be_cloned_parameter_node;
-          break;
-        }
+      auto &be_cloned_index = param_value_cloned->be_cloned_index();
+      if (std::find(be_cloned_index.begin(), be_cloned_index.end(), cloned_index) != be_cloned_index.end()) {
+        found_be_cloned_parameter = true;
+        cloned_from_parameter = be_cloned_parameter;
+        cloned_from_node = be_cloned_parameter_node;
       }
     }
 
@@ -1375,7 +1373,6 @@ void SetClonedTensorShapeForOptimizer(const FuncGraphPtr &root) {
   std::string env = common::GetEnv("SLICE_ENV");
   if (!env.empty()) {
     MS_LOG(INFO) << "Slice tensors shape will be configured from env:" << env;
-    abstract::InitUndeterminedFromEnv(env);
   }
 }
 
@@ -2090,9 +2087,9 @@ std::string NodeParameterName(const CNodePtr &node) {
     if (input->isa<Parameter>()) {
       auto input_parameter = input->cast<ParameterPtr>();
       if (input_parameter->has_default()) {
-        auto param_value = std::dynamic_pointer_cast<ParamValuePy>(input_parameter->default_param());
-        if (py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), REQUIRES_GRAD))) {
-          return py::cast<std::string>(parse::python_adapter::GetPyObjAttr(param_value->value(), PARAM_NAME));
+        const auto &param_value = input_parameter->default_param();
+        if (param_value->requires_grad()) {
+          return param_value->name();
         }
       }
     }
@@ -2120,6 +2117,9 @@ void CheckpointStrategy(const FuncGraphPtr &func_graph) {
     MS_EXCEPTION_IF_NULL(prim);
     OperatorInfoPtr operator_info = cnode->operator_info();
     if (operator_info) {
+      if (operator_info->name().find(RESHAPEINFO) != std::string::npos) {
+        continue;
+      }
       StrategyPtr strategyPtr = operator_info->strategy();
       MS_EXCEPTION_IF_NULL(node->scope());
       stra_map[param_name] = strategyPtr;
diff --git a/mindspore/ccsrc/parallel/step_parallel.h b/mindspore/ccsrc/frontend/parallel/step_parallel.h
similarity index 97%
rename from mindspore/ccsrc/parallel/step_parallel.h
rename to mindspore/ccsrc/frontend/parallel/step_parallel.h
index 308473dcd74..f9fe67ea6be 100644
--- a/mindspore/ccsrc/parallel/step_parallel.h
+++ b/mindspore/ccsrc/frontend/parallel/step_parallel.h
@@ -27,9 +27,9 @@
 #include <set>
 
 #include "./common.h"
-#include "optimizer/opt.h"
-#include "parallel/strategy.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 using OperatorInfoPtr = std::shared_ptr<mindspore::parallel::OperatorInfo>;
 
diff --git a/mindspore/ccsrc/parallel/strategy.h b/mindspore/ccsrc/frontend/parallel/strategy.h
similarity index 98%
rename from mindspore/ccsrc/parallel/strategy.h
rename to mindspore/ccsrc/frontend/parallel/strategy.h
index bc62dd53087..ca01164a6a4 100644
--- a/mindspore/ccsrc/parallel/strategy.h
+++ b/mindspore/ccsrc/frontend/parallel/strategy.h
@@ -23,7 +23,7 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc b/mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
rename to mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
index de10f4beb40..bf7c4e29abf 100644
--- a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
+++ b/mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
+#include "frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
 
 #include <fstream>
 #include <memory>
@@ -93,6 +93,7 @@ Status StrategyCheckpoint::Save(const StrategyMap &strategy_map) {
     parallel_strategy_item->set_node_name(node_stra.first);
     straspb::ParallelStrategys *parallel_strategys = parallel_strategy_item->mutable_parallel_strategys();
     MS_EXCEPTION_IF_NULL(parallel_strategys);
+    MS_EXCEPTION_IF_NULL(node_stra.second);
     parallel_strategys->set_stage(IntToUint(node_stra.second->GetInputStage()));
     for (auto &dims : node_stra.second->GetInputDim()) {
       straspb::ParallelStrategy *parallel_strategy = parallel_strategys->add_parallel_strategy();
diff --git a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h b/mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
similarity index 93%
rename from mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
rename to mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
index a758a9e7bb4..67cbb92ee29 100644
--- a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
+++ b/mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
@@ -19,9 +19,9 @@
 
 #include <string>
 #include <unordered_map>
-#include "parallel/ops_info/ops_utils.h"
-#include "parallel/strategy.h"
-#include "parallel/context.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/context.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/arrangement.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/tensor_layout/arrangement.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.cc
index 235ab00302d..cff3d53a888 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/arrangement.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/arrangement.h"
+#include "frontend/parallel/tensor_layout/arrangement.h"
 #include <algorithm>
 #include <iostream>
 #include <utility>
 #include "common/utils.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/parallel/tensor_layout/arrangement.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.h
similarity index 95%
rename from mindspore/ccsrc/parallel/tensor_layout/arrangement.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.h
index ca71b05c915..ab807fb20a1 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/arrangement.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.h
@@ -23,8 +23,8 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "parallel/status.h"
-#include "parallel/tensor_layout/array.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/array.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/array.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/array.cc
similarity index 95%
rename from mindspore/ccsrc/parallel/tensor_layout/array.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/array.cc
index ef358e7cded..4e1f467793e 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/array.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/array.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/array.h"
+#include "frontend/parallel/tensor_layout/array.h"
 #include <utility>
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/array.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/array.h
similarity index 97%
rename from mindspore/ccsrc/parallel/tensor_layout/array.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/array.h
index 5aa3bdb1389..13b3982a189 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/array.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/array.h
@@ -22,7 +22,7 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/construct_operator.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/construct_operator.cc
similarity index 99%
rename from mindspore/ccsrc/parallel/tensor_layout/construct_operator.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/construct_operator.cc
index b5ca5ed60a1..9395d3df89a 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/construct_operator.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/construct_operator.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/construct_operator.h"
+#include "frontend/parallel/tensor_layout/construct_operator.h"
 
 #include <functional>
 #include <numeric>
diff --git a/mindspore/ccsrc/parallel/tensor_layout/construct_operator.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/construct_operator.h
similarity index 95%
rename from mindspore/ccsrc/parallel/tensor_layout/construct_operator.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/construct_operator.h
index 1a69638fb65..b06d70af364 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/construct_operator.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/construct_operator.h
@@ -22,8 +22,8 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/status.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/layout_transfer.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/layout_transfer.cc
similarity index 92%
rename from mindspore/ccsrc/parallel/tensor_layout/layout_transfer.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/layout_transfer.cc
index 84c0580ba87..d5d34a484f4 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/layout_transfer.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/layout_transfer.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/layout_transfer.h"
+#include "frontend/parallel/tensor_layout/layout_transfer.h"
 #include "common/utils.h"
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/layout_transfer.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/layout_transfer.h
similarity index 93%
rename from mindspore/ccsrc/parallel/tensor_layout/layout_transfer.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/layout_transfer.h
index c4da4b728f9..01c56fc7cff 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/layout_transfer.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/layout_transfer.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_PARALLEL_TENSOR_LAYOUT_LAYOUT_TRANSFER_H_
 
 #include <string>
-#include "parallel/status.h"
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/map.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/map.cc
similarity index 97%
rename from mindspore/ccsrc/parallel/tensor_layout/map.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/map.cc
index 669920fc446..184f0c75306 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/map.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/map.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/map.h"
+#include "frontend/parallel/tensor_layout/map.h"
 #include <algorithm>
 #include <iostream>
 #include <utility>
 #include "common/utils.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/parallel/tensor_layout/map.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/map.h
similarity index 91%
rename from mindspore/ccsrc/parallel/tensor_layout/map.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/map.h
index 8c8bba27750..3d299d4b90f 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/map.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/map.h
@@ -22,9 +22,9 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "parallel/status.h"
-#include "parallel/tensor_layout/arrangement.h"
-#include "parallel/tensor_layout/array.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/arrangement.h"
+#include "frontend/parallel/tensor_layout/array.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/redistribution_layout_transfer.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_layout_transfer.cc
similarity index 91%
rename from mindspore/ccsrc/parallel/tensor_layout/redistribution_layout_transfer.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_layout_transfer.cc
index 7ed07ac02ec..a5a488d8079 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/redistribution_layout_transfer.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_layout_transfer.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/redistribution_layout_transfer.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/reshape_layout_transfer.h"
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/tensor_layout/redistribution_layout_transfer.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/reshape_layout_transfer.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/redistribution_layout_transfer.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_layout_transfer.h
similarity index 88%
rename from mindspore/ccsrc/parallel/tensor_layout/redistribution_layout_transfer.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_layout_transfer.h
index 7b57f46dd6e..0347b6423aa 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/redistribution_layout_transfer.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_layout_transfer.h
@@ -18,9 +18,9 @@
 #define MINDSPORE_CCSRC_PARALLEL_TENSOR_LAYOUT_REDISTRIBUTION_LAYOUT_TRANSFER_H_
 
 #include <memory>
-#include "parallel/status.h"
-#include "parallel/tensor_layout/layout_transfer.h"
-#include "parallel/tensor_layout/reshape_layout_transfer.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/layout_transfer.h"
+#include "frontend/parallel/tensor_layout/reshape_layout_transfer.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_operator_infer.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_operator_infer.cc
index 946620ec4c2..6ac24418b7d 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_operator_infer.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/redistribution_operator_infer.h"
+#include "frontend/parallel/tensor_layout/redistribution_operator_infer.h"
 
 #include <utility>
 
-#include "parallel/device_manager.h"
+#include "frontend/parallel/device_manager.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_operator_infer.h
similarity index 95%
rename from mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_operator_infer.h
index 37a8ac3d9ee..66cdb3f9254 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/redistribution_operator_infer.h
@@ -23,8 +23,8 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/tensor_layout/construct_operator.h"
-#include "parallel/tensor_layout/redistribution_layout_transfer.h"
+#include "frontend/parallel/tensor_layout/construct_operator.h"
+#include "frontend/parallel/tensor_layout/redistribution_layout_transfer.h"
 #include "utils/convert_utils.h"
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/reshape_layout_transfer.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/reshape_layout_transfer.cc
similarity index 96%
rename from mindspore/ccsrc/parallel/tensor_layout/reshape_layout_transfer.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/reshape_layout_transfer.cc
index 4c66befd786..98f7cf78fa9 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/reshape_layout_transfer.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/reshape_layout_transfer.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/reshape_layout_transfer.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/tensor_layout/reshape_layout_transfer.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/reshape_layout_transfer.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/reshape_layout_transfer.h
similarity index 95%
rename from mindspore/ccsrc/parallel/tensor_layout/reshape_layout_transfer.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/reshape_layout_transfer.h
index ed62cb59dad..f9ebe9e32bc 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/reshape_layout_transfer.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/reshape_layout_transfer.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_PARALLEL_TENSOR_LAYOUT_RESHAPE_LAYOUT_TRANSFER_H_
 
 #include <memory>
-#include "parallel/status.h"
-#include "parallel/tensor_layout/layout_transfer.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/layout_transfer.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/shape_util.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/tensor_layout/shape_util.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.cc
index e8f208708cf..83282d16b31 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/shape_util.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 #include <utility>
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/shape_util.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.h
similarity index 99%
rename from mindspore/ccsrc/parallel/tensor_layout/shape_util.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.h
index 2ec21f3881e..49dd39ffd6b 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/shape_util.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.h
@@ -23,7 +23,7 @@
 #include <string>
 #include <vector>
 
-#include "parallel/status.h"
+#include "frontend/parallel/status.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/tensor_info.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_info.h
similarity index 94%
rename from mindspore/ccsrc/parallel/tensor_layout/tensor_info.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_info.h
index 0eee736cea9..fc78b1f59c1 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/tensor_info.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_info.h
@@ -22,9 +22,9 @@
 #include <utility>
 #include <vector>
 
-#include "parallel/device_matrix.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/tensor_layout.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_layout.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/tensor_layout/tensor_layout.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_layout.cc
index f3498065f29..b9c6cc78dec 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/tensor_layout.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_layout.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 #include <iostream>
 #include <utility>
 #include "common/utils.h"
 #include "ir/value.h"
-#include "parallel/device_matrix.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/array.h"
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/device_matrix.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/array.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/tensor_layout.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_layout.h
similarity index 94%
rename from mindspore/ccsrc/parallel/tensor_layout/tensor_layout.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_layout.h
index f51ed4e3e0a..a9fdc9610c8 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/tensor_layout.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_layout.h
@@ -22,10 +22,10 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "parallel/device_manager.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/arrangement.h"
-#include "parallel/tensor_layout/map.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/arrangement.h"
+#include "frontend/parallel/tensor_layout/map.h"
 #include "utils/convert_utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc
similarity index 98%
rename from mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc
index 7824c21f3d5..43bb3307878 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 #include <cfloat>
 #include <functional>
 #include <numeric>
 #include "common/utils.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.h
similarity index 91%
rename from mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h
rename to mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.h
index d1f46108bbc..df4bd1570fc 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h
+++ b/mindspore/ccsrc/frontend/parallel/tensor_layout/tensor_redistribution.h
@@ -25,11 +25,11 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/status.h"
-#include "parallel/tensor_layout/construct_operator.h"
-#include "parallel/tensor_layout/redistribution_operator_infer.h"
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/status.h"
+#include "frontend/parallel/tensor_layout/construct_operator.h"
+#include "frontend/parallel/tensor_layout/redistribution_operator_infer.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/gvar/typeid_manager.cc b/mindspore/ccsrc/gvar/typeid_manager.cc
index f40052411ab..bc74f3a0df0 100644
--- a/mindspore/ccsrc/gvar/typeid_manager.cc
+++ b/mindspore/ccsrc/gvar/typeid_manager.cc
@@ -20,7 +20,7 @@
 #include <mutex>
 #include <unordered_map>
 
-#include "ir/base.h"
+#include "base/base.h"
 
 namespace mindspore {
 
diff --git a/mindspore/ccsrc/dataset/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/CMakeLists.txt
similarity index 86%
rename from mindspore/ccsrc/dataset/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/CMakeLists.txt
index 9238be93f29..df9729c4ee1 100644
--- a/mindspore/ccsrc/dataset/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/CMakeLists.txt
@@ -34,11 +34,12 @@ endif ()
 
 ########### Set up the include directories ###########################
 include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc)
-include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/device/ascend/platform)
+include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/runtime/device/ascend/platform)
 
 include_directories(${CMAKE_BINARY_DIR}) # for protobuf generated .h
 
-include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/mindrecord/include)
+include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/mindrecord/include)
+include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include)
 ######################################################################
 
 ####################### Flags ########################################
@@ -46,6 +47,8 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/mindrecord/include)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-rpath,$ORIGIN:$ORIGIN/lib")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=default")
 
+ms_build_flatbuffers("engine/cache/de_tensor.fbs" ${CMAKE_CURRENT_SOURCE_DIR} generated_engine_files ${CMAKE_BINARY_DIR})
+
 ################## Include sub-modules ###############################
 add_subdirectory(util)
 add_subdirectory(core)
@@ -54,7 +57,7 @@ add_subdirectory(engine)
 add_subdirectory(api)
 add_subdirectory(text)
 ######################################################################
-add_dependencies(core utils)
+add_dependencies(utils core)
 add_dependencies(kernels-image core)
 add_dependencies(kernels-data core)
 add_dependencies(kernels core)
@@ -67,7 +70,10 @@ add_dependencies(engine-gnn core)
 add_dependencies(engine core)
 add_dependencies(text core)
 add_dependencies(text-kernels core)
-add_dependencies(APItoPython core)
+add_dependencies(cpp-API core)
+if (ENABLE_PYTHON)
+    add_dependencies(APItoPython core)
+endif()
 if (ENABLE_TDTQUE)
     add_dependencies(engine-tdt core)
 endif ()
@@ -78,24 +84,34 @@ set(submodules
     $<TARGET_OBJECTS:kernels>
     $<TARGET_OBJECTS:kernels-image>
     $<TARGET_OBJECTS:kernels-data>
-    $<TARGET_OBJECTS:APItoPython>
+    $<TARGET_OBJECTS:cpp-API>
     $<TARGET_OBJECTS:engine-datasetops-source>
     $<TARGET_OBJECTS:engine-datasetops-source-sampler>
     $<TARGET_OBJECTS:engine-gnn>
     $<TARGET_OBJECTS:engine-perf>
     $<TARGET_OBJECTS:engine-datasetops>
     $<TARGET_OBJECTS:engine-opt>
+    $<TARGET_OBJECTS:engine-cache-client>
+    $<TARGET_OBJECTS:engine-cache-server>
     $<TARGET_OBJECTS:engine>
     $<TARGET_OBJECTS:text>
     $<TARGET_OBJECTS:text-kernels>
     )
 
+if (ENABLE_PYTHON)
+    set(submodules
+        ${submodules}
+        $<TARGET_OBJECTS:APItoPython>)
+endif()
+
 if (ENABLE_TDTQUE)
     add_library(_c_dataengine SHARED ${submodules} $<TARGET_OBJECTS:engine-tdt>)
 else ()
     add_library(_c_dataengine SHARED ${submodules})
 endif ()
 
+add_dependencies(_c_dataengine generated_engine_files)
+
 set_target_properties(_c_dataengine PROPERTIES
     PREFIX "${PYTHON_MODULE_PREFIX}"
     SUFFIX "${PYTHON_MODULE_EXTENSION}"
@@ -126,7 +142,7 @@ endif ()
 
 add_dependencies(_c_dataengine _c_mindrecord)
 if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
-    set(MINDRECORD_LINK_OBJECT ${CMAKE_BINARY_DIR}/mindspore/ccsrc/mindrecord/CMakeFiles/_c_mindrecord.dir/objects.a)
+    set(MINDRECORD_LINK_OBJECT ${CMAKE_BINARY_DIR}/mindspore/ccsrc/minddata/mindrecord/CMakeFiles/_c_mindrecord.dir/objects.a)
     target_link_libraries(_c_dataengine PRIVATE _c_mindrecord ${MINDRECORD_LINK_OBJECT} mindspore::sqlite)
 else()
     target_link_libraries(_c_dataengine PRIVATE _c_mindrecord)
diff --git a/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt
new file mode 100644
index 00000000000..ae0b9cc28ed
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt
@@ -0,0 +1,16 @@
+file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
+if (ENABLE_PYTHON)
+  add_library(APItoPython OBJECT
+    de_pipeline.cc
+    python_bindings.cc
+    )
+  target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS})
+endif()
+
+add_library(cpp-API OBJECT
+  datasets.cc
+  iterator.cc
+  transforms.cc
+  samplers.cc
+  )
diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
new file mode 100644
index 00000000000..3072a62dc91
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
@@ -0,0 +1,446 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fstream>
+
+#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/include/transforms.h"
+#include "minddata/dataset/include/samplers.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
+#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
+#include "minddata/dataset/engine/datasetops/batch_op.h"
+#include "minddata/dataset/engine/datasetops/map_op.h"
+#include "minddata/dataset/engine/datasetops/repeat_op.h"
+#include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/datasetops/project_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/util/random.h"
+
+namespace mindspore {
+namespace dataset {
+namespace api {
+
+#define RETURN_NULL_IF_ERROR(_s) \
+  do {                           \
+    Status __rc = (_s);          \
+    if (__rc.IsError()) {        \
+      return nullptr;            \
+    }                            \
+  } while (false)
+
+// Function to create the iterator, which will build and launch the execution tree.
+std::shared_ptr<Iterator> Dataset::CreateIterator() {
+  std::shared_ptr<Iterator> iter;
+  try {
+    iter = std::make_shared<Iterator>();
+    Status rc = iter->BuildAndLaunchTree(shared_from_this());
+    if (rc.IsError()) {
+      MS_LOG(ERROR) << "CreateIterator failed.";
+      return nullptr;
+    }
+
+    return iter;
+  } catch (const std::exception &err) {
+    MS_LOG(ERROR) << "CreateIterator: Iterator exception caught: " << err.what();
+    return nullptr;
+  }
+
+  return iter;
+}
+
+// Constructor
+Dataset::Dataset() {
+  // Fetch some default value from config manager
+  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
+  num_workers_ = cfg->num_parallel_workers();
+  rows_per_buffer_ = cfg->rows_per_buffer();
+  connector_que_size_ = cfg->op_connector_size();
+}
+
+// Function to create a ImageFolderDataset.
+std::shared_ptr<ImageFolderDataset> ImageFolder(std::string dataset_dir, bool decode,
+                                                std::shared_ptr<SamplerObj> sampler, std::set<std::string> extensions,
+                                                std::map<std::string, int32_t> class_indexing) {
+  // This arg is exist in ImageFolderOp, but not externalized (in Python API). The default value is false.
+  bool recursive = false;
+
+  // Create logical representation of ImageFolderDataset.
+  auto ds = std::make_shared<ImageFolderDataset>(dataset_dir, decode, sampler, recursive, extensions, class_indexing);
+
+  // Call derived class validation method.
+  return ds->ValidateParams() ? ds : nullptr;
+}
+
+// Function to create a MnistDataset.
+std::shared_ptr<MnistDataset> Mnist(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler) {
+  auto ds = std::make_shared<MnistDataset>(dataset_dir, sampler);
+
+  // Call derived class validation method.
+  return ds->ValidateParams() ? ds : nullptr;
+}
+
+// Function to create a Cifar10Dataset.
+std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, int32_t num_samples,
+                                        std::shared_ptr<SamplerObj> sampler) {
+  auto ds = std::make_shared<Cifar10Dataset>(dataset_dir, num_samples, sampler);
+
+  // Call derived class validation method.
+  return ds->ValidateParams() ? ds : nullptr;
+}
+
+// Function to create a Batch dataset
+std::shared_ptr<BatchDataset> Dataset::Batch(int32_t batch_size, bool drop_remainder) {
+  // Default values
+  std::vector<std::string> cols_to_map = {};
+  std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map;
+  bool pad = false;
+  auto ds = std::make_shared<BatchDataset>(batch_size, drop_remainder, pad, cols_to_map, pad_map);
+
+  if (!ds->ValidateParams()) {
+    return nullptr;
+  }
+
+  ds->children.push_back(shared_from_this());
+
+  return ds;
+}
+
+// Function to create Repeat dataset.
+std::shared_ptr<Dataset> Dataset::Repeat(int32_t count) {
+  // Workaround for repeat == 1, do not inject repeat.
+  if (count == 1) {
+    return shared_from_this();
+  }
+
+  auto ds = std::make_shared<RepeatDataset>(count);
+
+  if (!ds->ValidateParams()) {
+    return nullptr;
+  }
+
+  ds->children.push_back(shared_from_this());
+
+  return ds;
+}
+
+// Function to create a Map dataset.
+std::shared_ptr<MapDataset> Dataset::Map(std::vector<std::shared_ptr<TensorOperation>> operations,
+                                         std::vector<std::string> input_columns,
+                                         std::vector<std::string> output_columns,
+                                         const std::vector<std::string> &project_columns) {
+  auto ds = std::make_shared<MapDataset>(operations, input_columns, output_columns, project_columns);
+
+  if (!ds->ValidateParams()) {
+    return nullptr;
+  }
+
+  ds->children.push_back(shared_from_this());
+
+  return ds;
+}
+
+// Function to create a ShuffleOp
+std::shared_ptr<ShuffleDataset> Dataset::Shuffle(int32_t shuffle_size) {
+  // Pass in reshuffle_each_epoch with true
+  auto ds = std::make_shared<ShuffleDataset>(shuffle_size, true);
+
+  if (!ds->ValidateParams()) {
+    return nullptr;
+  }
+
+  ds->children.push_back(shared_from_this());
+
+  return ds;
+}
+
+// Function to create a ProjectDataset.
+std::shared_ptr<ProjectDataset> Dataset::Project(const std::vector<std::string> &columns) {
+  auto ds = std::make_shared<ProjectDataset>(columns);
+  // Call derived class validation method.
+  if (!ds->ValidateParams()) {
+    return nullptr;
+  }
+
+  ds->children.push_back(shared_from_this());
+
+  return ds;
+}
+
+// Helper function to create default RandomSampler.
+std::shared_ptr<SamplerObj> CreateDefaultSampler() {
+  int32_t num_samples = 0;  // 0 means to sample all ids.
+  bool replacement = false;
+  return std::make_shared<RandomSamplerObj>(replacement, num_samples);
+}
+
+/* ####################################### Derived Dataset classes ################################# */
+
+ImageFolderDataset::ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler,
+                                       bool recursive, std::set<std::string> extensions,
+                                       std::map<std::string, int32_t> class_indexing)
+    : dataset_dir_(dataset_dir),
+      decode_(decode),
+      sampler_(sampler),
+      recursive_(recursive),
+      class_indexing_(class_indexing),
+      exts_(extensions) {}
+
+bool ImageFolderDataset::ValidateParams() {
+  if (dataset_dir_.empty()) {
+    MS_LOG(ERROR) << "No dataset path is specified.";
+    return false;
+  }
+
+  return true;
+}
+
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ImageFolderDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  // If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
+  if (sampler_ == nullptr) {
+    sampler_ = CreateDefaultSampler();
+  }
+
+  // Do internal Schema generation.
+  // This arg is exist in ImageFolderOp, but not externalized (in Python API).
+  std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
+  TensorShape scalar = TensorShape::CreateScalar();
+  RETURN_NULL_IF_ERROR(
+    schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
+  RETURN_NULL_IF_ERROR(
+    schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar)));
+  node_ops.push_back(std::make_shared<ImageFolderOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
+                                                     recursive_, decode_, exts_, class_indexing_, std::move(schema),
+                                                     std::move(sampler_->Build())));
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+MnistDataset::MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler)
+    : dataset_dir_(dataset_dir), sampler_(sampler) {}
+
+bool MnistDataset::ValidateParams() {
+  if (dataset_dir_.empty()) {
+    MS_LOG(ERROR) << "No dataset path is specified.";
+    return false;
+  }
+
+  return true;
+}
+
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> MnistDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  // If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
+  if (sampler_ == nullptr) {
+    sampler_ = CreateDefaultSampler();
+  }
+
+  // Do internal Schema generation.
+  auto schema = std::make_unique<DataSchema>();
+  RETURN_NULL_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
+  TensorShape scalar = TensorShape::CreateScalar();
+  RETURN_NULL_IF_ERROR(
+    schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar)));
+
+  node_ops.push_back(std::make_shared<MnistOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
+                                               std::move(schema), std::move(sampler_->Build())));
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+BatchDataset::BatchDataset(int32_t batch_size, bool drop_remainder, bool pad, std::vector<std::string> cols_to_map,
+                           std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map)
+    : batch_size_(batch_size),
+      drop_remainder_(drop_remainder),
+      pad_(pad),
+      cols_to_map_(cols_to_map),
+      pad_map_(pad_map) {}
+
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> BatchDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+#ifdef ENABLE_PYTHON
+  py::function noop;
+  node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_,
+                                               cols_to_map_, noop, noop, pad_map_));
+#else
+  node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_,
+                                               cols_to_map_, pad_map_));
+#endif
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+bool BatchDataset::ValidateParams() {
+  if (batch_size_ <= 0) {
+    return false;
+  }
+
+  return true;
+}
+
+RepeatDataset::RepeatDataset(uint32_t count) : repeat_count_(count) {}
+
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> RepeatDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  node_ops.push_back(std::make_shared<RepeatOp>(repeat_count_));
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+bool RepeatDataset::ValidateParams() {
+  if (repeat_count_ <= 0) {
+    return false;
+  }
+
+  return true;
+}
+MapDataset::MapDataset(std::vector<std::shared_ptr<TensorOperation>> operations, std::vector<std::string> input_columns,
+                       std::vector<std::string> output_columns, const std::vector<std::string> &project_columns)
+    : operations_(operations),
+      input_columns_(input_columns),
+      output_columns_(output_columns),
+      project_columns_(project_columns) {}
+
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> MapDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  // Currently default is true, and this is not exposed to user.
+  bool perf_mode = true;
+
+  std::vector<std::shared_ptr<TensorOp>> tensor_ops;
+
+  // Build tensorOp from tensorOperation vector
+  // This is to ensure each iterator hold its own copy of the tensorOp objects.
+  (void)std::transform(
+    operations_.begin(), operations_.end(), std::back_inserter(tensor_ops),
+    [](std::shared_ptr<TensorOperation> operation) -> std::shared_ptr<TensorOp> { return operation->Build(); });
+
+  // This parameter will be removed with next rebase
+  std::vector<std::string> col_orders;
+  auto map_op =
+    std::make_shared<MapOp>(input_columns_, output_columns_, tensor_ops, num_workers_, connector_que_size_, perf_mode);
+  if (!project_columns_.empty()) {
+    auto project_op = std::make_shared<ProjectOp>(project_columns_);
+    node_ops.push_back(project_op);
+  }
+
+  node_ops.push_back(map_op);
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+bool MapDataset::ValidateParams() {
+  if (operations_.empty()) {
+    return false;
+  }
+
+  return true;
+}
+
+// Constructor for ShuffleDataset
+ShuffleDataset::ShuffleDataset(int32_t shuffle_size, bool reset_every_epoch)
+    : shuffle_size_(shuffle_size), shuffle_seed_(GetSeed()), reset_every_epoch_(reset_every_epoch) {}
+
+// Function to build the ShuffleOp
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ShuffleDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  node_ops.push_back(std::make_shared<ShuffleOp>(shuffle_size_, shuffle_seed_, connector_que_size_, reset_every_epoch_,
+                                                 rows_per_buffer_));
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+// Function to validate the parameters for ShuffleDataset
+bool ShuffleDataset::ValidateParams() {
+  if (shuffle_size_ <= 1) {
+    MS_LOG(ERROR) << "ShuffleDataset: Invalid input, shuffle_size: " << shuffle_size_;
+    return false;
+  }
+
+  return true;
+}
+
+// Constructor for Cifar10Dataset
+Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr<SamplerObj> sampler)
+    : dataset_dir_(dataset_dir), num_samples_(num_samples), sampler_(sampler) {}
+
+bool Cifar10Dataset::ValidateParams() {
+  if (dataset_dir_.empty()) {
+    MS_LOG(ERROR) << "No dataset path is specified.";
+    return false;
+  }
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "Number of samples cannot be negative";
+    return false;
+  }
+  return true;
+}
+
+// Function to build CifarOp
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Cifar10Dataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  // If user does not specify Sampler, create a default sampler based on the shuffle variable.
+  if (sampler_ == nullptr) {
+    sampler_ = CreateDefaultSampler();
+  }
+
+  // Do internal Schema generation.
+  auto schema = std::make_unique<DataSchema>();
+  RETURN_NULL_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
+  TensorShape scalar = TensorShape::CreateScalar();
+  RETURN_NULL_IF_ERROR(
+    schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar)));
+
+  node_ops.push_back(std::make_shared<CifarOp>(CifarOp::CifarType::kCifar10, num_workers_, rows_per_buffer_,
+                                               dataset_dir_, connector_que_size_, std::move(schema),
+                                               std::move(sampler_->Build())));
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+// Function to build ProjectOp
+ProjectDataset::ProjectDataset(const std::vector<std::string> &columns) : columns_(columns) {}
+
+bool ProjectDataset::ValidateParams() {
+  if (columns_.empty()) {
+    MS_LOG(ERROR) << "No columns are specified.";
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ProjectDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  node_ops.push_back(std::make_shared<ProjectOp>(columns_));
+  return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
+}
+
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.cc b/mindspore/ccsrc/minddata/dataset/api/de_pipeline.cc
similarity index 86%
rename from mindspore/ccsrc/dataset/api/de_pipeline.cc
rename to mindspore/ccsrc/minddata/dataset/api/de_pipeline.cc
index 78fcdb7dd4d..2a6166f868c 100644
--- a/mindspore/ccsrc/dataset/api/de_pipeline.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/de_pipeline.cc
@@ -13,34 +13,37 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/api/de_pipeline.h"
+#include "minddata/dataset/api/de_pipeline.h"
 
 #include <algorithm>
 #include <set>
 #include <map>
 
 #include "common/utils.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/bucket_batch_by_length_op.h"
-#include "dataset/engine/datasetops/filter_op.h"
-#include "dataset/engine/datasetops/source/celeba_op.h"
-#include "dataset/engine/datasetops/source/cifar_op.h"
-#include "dataset/engine/datasetops/source/clue_op.h"
-#include "dataset/engine/datasetops/source/coco_op.h"
-#include "dataset/engine/datasetops/source/image_folder_op.h"
-#include "dataset/engine/datasetops/source/manifest_op.h"
-#include "dataset/engine/datasetops/source/mnist_op.h"
-#include "dataset/engine/datasetops/source/random_data_op.h"
-#include "dataset/engine/datasetops/source/text_file_op.h"
-#include "dataset/engine/datasetops/source/voc_op.h"
-#include "dataset/kernels/py_func_op.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
-#include "mindrecord/include/shard_category.h"
-#include "mindrecord/include/shard_distributed_sample.h"
-#include "mindrecord/include/shard_sample.h"
-#include "mindrecord/include/shard_shuffle.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/cache/cache_client.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/bucket_batch_by_length_op.h"
+#include "minddata/dataset/engine/datasetops/cache_op.h"
+#include "minddata/dataset/engine/datasetops/filter_op.h"
+#include "minddata/dataset/engine/datasetops/source/celeba_op.h"
+#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
+#include "minddata/dataset/engine/datasetops/source/clue_op.h"
+#include "minddata/dataset/engine/datasetops/source/coco_op.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/datasetops/source/manifest_op.h"
+#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
+#include "minddata/dataset/engine/datasetops/source/random_data_op.h"
+#include "minddata/dataset/engine/datasetops/source/text_file_op.h"
+#include "minddata/dataset/engine/datasetops/source/voc_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/kernels/py_func_op.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/mindrecord/include/shard_category.h"
+#include "minddata/mindrecord/include/shard_distributed_sample.h"
+#include "minddata/mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
 #include "pybind11/stl.h"
 #include "utils/log_adapter.h"
 
@@ -441,6 +444,8 @@ Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *
   MapOp::Builder map_builder;
   std::vector<std::shared_ptr<TensorOp>> tensor_op_list;
   std::vector<std::string> project_columns;
+  std::shared_ptr<CacheClient> cache_client = nullptr;
+  int num_workers = 0;
 
   if (args["operations"].is_none()) RETURN_STATUS_UNEXPECTED("Error: 'operations' is not set. \n");
 
@@ -456,7 +461,8 @@ Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *
       } else if (key == "columns_order") {
         project_columns = ToStringVector(value);
       } else if (key == "num_parallel_workers") {
-        (void)map_builder.SetNumWorkers(ToInt(value));
+        num_workers = ToInt(value);
+        (void)map_builder.SetNumWorkers(num_workers);
       } else if (key == "prefetch_size") {
         (void)map_builder.SetOpConnectorSize(ToInt(value));
       } else if (key == "operations") {
@@ -477,6 +483,8 @@ Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *
         }
         if (tensor_op_list.empty()) RETURN_STATUS_UNEXPECTED("Error: tensor_op is invalid or not set.");
         (void)map_builder.SetTensorFuncs(std::move(tensor_op_list));
+      } else if (key == "cache") {
+        cache_client = value.cast<std::shared_ptr<CacheClient>>();
       } else {
         RETURN_STATUS_UNEXPECTED("Error: Unhandled key: " + key);
       }
@@ -499,6 +507,15 @@ Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *
     *bottom = map_op;
   }
 
+  // Additionally, add a cache if required.  This will go over top of the project op if one
+  // was created, otherwise it goes over top of the map op
+  if (cache_client) {
+    std::shared_ptr<DatasetOp> cache_op = nullptr;
+    RETURN_IF_NOT_OK(AddCacheOp(cache_client, num_workers, *top, &cache_op));
+    *top = cache_op;
+    *bottom = map_op;
+  }
+
   return Status::OK();
 }
 
@@ -809,6 +826,9 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<Dataset
                                    std::shared_ptr<DatasetOp> *bottom) {
   // Required arguments
   std::vector<std::string> files_list;
+  std::shared_ptr<CacheClient> cache_client = nullptr;
+  std::shared_ptr<Sampler> sampler = nullptr;
+  int num_workers = 0;
   std::shared_ptr<TFReaderOp::Builder> builder = std::make_shared<TFReaderOp::Builder>();
   if (!args["dataset_files"].is_none()) {
     files_list = ToStringVector(args["dataset_files"]);
@@ -828,7 +848,8 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<Dataset
     py::handle value = arg.second;
     if (!value.is_none()) {
       if (key == "num_parallel_workers") {
-        (void)builder->SetNumWorkers(ToInt(value));
+        num_workers = ToInt(value);
+        (void)builder->SetNumWorkers(num_workers);
       } else if (key == "columns_list") {
         columns_to_load = ToStringVector(value);
         (void)builder->SetColumnsToLoad(columns_to_load);
@@ -848,6 +869,11 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<Dataset
         (void)builder->SetDeviceId(ToInt(value));
       } else if (key == "shard_equal_rows") {
         (void)builder->SetShardEqualRows(ToBool(value));
+      } else if (key == "cache") {
+        cache_client = value.cast<std::shared_ptr<CacheClient>>();
+      } else if (key == "sampler") {
+        auto create = py::reinterpret_borrow<py::object>(value).attr("create");
+        sampler = create().cast<std::shared_ptr<Sampler>>();
       }
     }
   }
@@ -860,12 +886,27 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<Dataset
     }
     (void)builder->SetDataSchema(std::move(schema));
   }
+
+  // If the user gave a sampler, but they did not ask for a cache, then by itself this is not allowed
+  // because TFReaderOp is a non-mappable dataset that does not support sampling.
+  // However, if a cache operator is injected at some other place higher in the tree, that cache can
+  // inherit this sampler from the leaf, providing sampling support from the caching layer.
+  // That is why we save the sampler here in a leaf node that does not use sampling.
+  if (sampler) {
+    (void)builder->SetSampler(std::move(sampler));
+  } else if (cache_client) {
+    int64_t num_samples = 0;
+    int64_t start_index = 0;
+    sampler = std::make_shared<SequentialSampler>(num_samples, start_index);
+    (void)builder->SetSampler(std::move(sampler));
+  }
+
   std::shared_ptr<TFReaderOp> tf_op;
   RETURN_IF_NOT_OK(builder->Build(&tf_op));
   RETURN_IF_NOT_OK(tree_->AssociateNode(tf_op));
   *top = tf_op;
 
-  if (shuffle_required) {
+  if (!cache_client && shuffle_required) {
     const boolean estimate = true;
     const int64_t workers = 8;
     std::shared_ptr<DatasetOp> shuffle_op = nullptr;
@@ -882,6 +923,15 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<Dataset
     *bottom = tf_op;
   }
 
+  // Add a cache op over this op if required and update the output subtree (top/bottom)
+  if (cache_client) {
+    // Note, it is not allowed to have both shuffle and cache
+    std::shared_ptr<DatasetOp> cache_op = nullptr;
+    RETURN_IF_NOT_OK(AddCacheOp(cache_client, num_workers, tf_op, &cache_op));
+    *top = cache_op;
+    *bottom = tf_op;
+  }
+
   return Status::OK();
 }
 
@@ -906,6 +956,8 @@ Status DEPipeline::ParseImageFolderOp(const py::dict &args, std::shared_ptr<Data
     std::string err_msg = "Error: No dataset path specified";
     RETURN_STATUS_UNEXPECTED(err_msg);
   }
+  int num_workers = 0;
+  std::shared_ptr<CacheClient> cache_client = nullptr;
   std::shared_ptr<ImageFolderOp::Builder> builder = std::make_shared<ImageFolderOp::Builder>();
   (void)builder->SetImageFolderDir(ToString(args["dataset_dir"]));
 
@@ -915,7 +967,8 @@ Status DEPipeline::ParseImageFolderOp(const py::dict &args, std::shared_ptr<Data
     py::handle value = arg.second;
     if (!value.is_none()) {
       if (key == "num_parallel_workers") {
-        (void)builder->SetNumWorkers(ToInt(value));
+        num_workers = ToInt(value);
+        (void)builder->SetNumWorkers(num_workers);
       } else if (key == "sampler") {
         auto create = py::reinterpret_borrow<py::object>(value).attr("create");
         std::shared_ptr<Sampler> sampler = create().cast<std::shared_ptr<Sampler>>();
@@ -926,12 +979,27 @@ Status DEPipeline::ParseImageFolderOp(const py::dict &args, std::shared_ptr<Data
         (void)builder->SetClassIndex(ToStringMap(value));
       } else if (key == "decode") {
         (void)builder->SetDecode(ToBool(value));
+      } else if (key == "cache") {
+        cache_client = value.cast<std::shared_ptr<CacheClient>>();
       }
     }
   }
-  std::shared_ptr<ImageFolderOp> op;
-  RETURN_IF_NOT_OK(builder->Build(&op));
-  *top = op;
+  std::shared_ptr<ImageFolderOp> if_op;
+  RETURN_IF_NOT_OK(builder->Build(&if_op));
+  RETURN_IF_NOT_OK(tree_->AssociateNode(if_op));
+  *top = if_op;
+
+  // Additionally, add a cache if required.
+  // Note that this cache op is only acting as a place holder for the caching position
+  // within the tree.  Later, a pre-pass will execute a tree transform to set up the actual
+  // caching logic in the tree.
+  if (cache_client) {
+    std::shared_ptr<DatasetOp> cache_op = nullptr;
+    RETURN_IF_NOT_OK(AddCacheOp(cache_client, num_workers, if_op, &cache_op));
+    *top = cache_op;
+    *bottom = if_op;
+  }
+
   return Status::OK();
 }
 
@@ -1130,9 +1198,12 @@ Status DEPipeline::ParseRandomDataOp(const py::dict &args, std::shared_ptr<Datas
                                      std::shared_ptr<DatasetOp> *bottom) {
   // Required arguments
   RandomDataOp::Builder builder;
+  std::shared_ptr<CacheClient> cache_client = nullptr;
+  std::shared_ptr<Sampler> sampler = nullptr;
+  int num_workers = 0;
 
-  if (args["num_samples"].is_none()) {
-    std::string err_msg = "Error: num_samples is a required argument";
+  if (args["total_rows"].is_none()) {
+    std::string err_msg = "Error: total_rows is a required argument";
     RETURN_STATUS_UNEXPECTED(err_msg);
   }
   std::vector<std::string> columns_to_load;
@@ -1141,16 +1212,23 @@ Status DEPipeline::ParseRandomDataOp(const py::dict &args, std::shared_ptr<Datas
   for (auto arg : args) {
     std::string key = py::str(arg.first);
     py::handle value = arg.second;
-    if (key == "num_parallel_workers") {
-      (void)builder.SetNumWorkers(ToInt(value));
-    } else if (key == "schema_file_path" || key == "schema_json_string") {
-      schema_exists = true;
-    } else if (key == "columns_list") {
-      columns_to_load = ToStringVector(value);
-    } else if (key == "num_samples") {
-      // This is not sampling here. The random data op needs to know how much data to
-      // generate. It does not currently support sampling.
-      (void)builder.SetTotalRows(ToInt(value));
+    if (!value.is_none()) {
+      if (key == "num_parallel_workers") {
+        num_workers = ToInt(value);
+        (void)builder.SetNumWorkers(num_workers);
+      } else if (key == "schema_file_path" || key == "schema_json_string") {
+        schema_exists = true;
+      } else if (key == "columns_list") {
+        columns_to_load = ToStringVector(value);
+      } else if (key == "total_rows") {
+        // This is not sampling here. The random data op needs to know how much data to generate.
+        (void)builder.SetTotalRows(ToInt(value));
+      } else if (key == "cache") {
+        cache_client = value.cast<std::shared_ptr<CacheClient>>();
+      } else if (key == "sampler") {
+        auto create = py::reinterpret_borrow<py::object>(value).attr("create");
+        sampler = create().cast<std::shared_ptr<Sampler>>();
+      }
     }
   }
   if (schema_exists) {
@@ -1162,9 +1240,34 @@ Status DEPipeline::ParseRandomDataOp(const py::dict &args, std::shared_ptr<Datas
     }
     (void)builder.SetDataSchema(std::move(schema));
   }
-  std::shared_ptr<RandomDataOp> op;
-  RETURN_IF_NOT_OK(builder.Build(&op));
-  *top = op;
+
+  // If the user gave a sampler, but they did not ask for a cache, then by itself this is not allowed
+  // because RandomDataOp is a non-mappable dataset that does not support sampling.
+  // However, if a cache operator is injected at some other place higher in the tree, that cache can
+  // inherit this sampler from the leaf, providing sampling support from the caching layer.
+  // That is why we save the sampler here in a leaf node that does not use sampling.
+  if (sampler) {
+    (void)builder.SetSampler(std::move(sampler));
+  } else if (cache_client) {
+    int64_t num_samples = 0;
+    int64_t start_index = 0;
+    sampler = std::make_shared<SequentialSampler>(num_samples, start_index);
+    (void)builder.SetSampler(std::move(sampler));
+  }
+
+  std::shared_ptr<RandomDataOp> random_op = nullptr;
+  RETURN_IF_NOT_OK(builder.Build(&random_op));
+  RETURN_IF_NOT_OK(tree_->AssociateNode(random_op));
+  *top = random_op;
+
+  // Add a cache op over this op if required and update the output subtree (top/bottom)
+  if (cache_client) {
+    std::shared_ptr<DatasetOp> cache_op = nullptr;
+    RETURN_IF_NOT_OK(AddCacheOp(cache_client, num_workers, random_op, &cache_op));
+    *top = cache_op;
+    *bottom = random_op;
+  }
+
   return Status::OK();
 }
 
@@ -1425,6 +1528,31 @@ Status DEPipeline::ParseClueOp(const py::dict &args, std::shared_ptr<DatasetOp>
   return Status::OK();
 }
 
+// Helper function to inject the cache operator over top of the current operation being built.
+Status DEPipeline::AddCacheOp(std::shared_ptr<CacheClient> cache_client, int num_workers,
+                              std::shared_ptr<DatasetOp> input_op, std::shared_ptr<DatasetOp> *cache_op) {
+  std::shared_ptr<CacheOp> new_cache_op = nullptr;
+  CacheOp::Builder cache_builder;
+  // use the same number of workers as the leaf. We need some optimization here, the user does not
+  // give the cache op number of workers directly.
+  if (num_workers != 0) {
+    (void)cache_builder.SetNumWorkers(num_workers);
+  }
+  (void)cache_builder.SetClient(cache_client);
+  RETURN_IF_NOT_OK(cache_builder.Build(&new_cache_op));
+  RETURN_IF_NOT_OK(tree_->AssociateNode(new_cache_op));
+  RETURN_IF_NOT_OK(new_cache_op->AddChild(input_op));
+  // We have now created:
+  //
+  // CacheOp
+  //   |
+  // input_op
+  //
+  *cache_op = new_cache_op;
+
+  return Status::OK();
+}
+
 // Helper function to inject a shuffle operator over top of the current operation being built.
 Status DEPipeline::AddShuffleOp(int64_t shuffle_size, std::shared_ptr<DatasetOp> input_op,
                                 std::shared_ptr<DatasetOp> *shuffle_op) {
diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.h b/mindspore/ccsrc/minddata/dataset/api/de_pipeline.h
similarity index 90%
rename from mindspore/ccsrc/dataset/api/de_pipeline.h
rename to mindspore/ccsrc/minddata/dataset/api/de_pipeline.h
index 7cfc73307c9..755e827ef2e 100644
--- a/mindspore/ccsrc/dataset/api/de_pipeline.h
+++ b/mindspore/ccsrc/minddata/dataset/api/de_pipeline.h
@@ -23,9 +23,9 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
-#include "dataset/core/client.h"  // DE client
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"  // DE client
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/util/status.h"
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
@@ -35,6 +35,8 @@ namespace mindspore {
 namespace dataset {
 using DsOpPtr = std::shared_ptr<DatasetOp>;
 
+class CacheClient;
+
 // enum for the dataset operator names
 enum OpName {
   kShuffle,
@@ -181,6 +183,16 @@ class DEPipeline {
 
   static Status ParsePadInfo(py::handle value, PadInfo *pad_info);
 
+  /// \brief Helper function to inject a cache operator over top of the current operation being built.
+  /// \param[in] cache_client The client to use for caching
+  /// \param[in] num_workers The number of workers to use in the cache op
+  /// \param[in] input_op The operator to build the cache on top of
+  /// \param[out] cache_op The top node of the created subtree (subtree contains two nodes). In this case it will be
+  ///     the cache operator
+  /// \return Status return code
+  Status AddCacheOp(std::shared_ptr<CacheClient> cache_client, int num_workers, std::shared_ptr<DatasetOp> input_op,
+                    std::shared_ptr<DatasetOp> *cache_op);
+
   /// \brief Helper function to inject a shuffle operator over top of the current operation being built.
   /// \param[in] shuffle_size The size to use in the shuffle buffer
   /// \param[in] input_op The operator to build shuffle on top of
diff --git a/mindspore/ccsrc/minddata/dataset/api/iterator.cc b/mindspore/ccsrc/minddata/dataset/api/iterator.cc
new file mode 100644
index 00000000000..068bcfaa047
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/api/iterator.cc
@@ -0,0 +1,101 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/include/iterator.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/include/datasets.h"
+
+namespace mindspore {
+namespace dataset {
+namespace api {
+
+// Get the next row from the data pipeline.
+void Iterator::GetNextRow(TensorMap *row) {
+  Status rc = iterator_->GetNextAsMap(row);
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "GetNextRow: Failed to get next row.";
+    row->clear();
+  }
+}
+
+// Shut down the data pipeline.
+void Iterator::Stop() {
+  // Releasing the iterator_ unique_ptre. This should trigger the destructor of iterator_.
+  iterator_.reset();
+
+  // Release ownership of tree_ shared pointer. This will decrement the ref count.
+  tree_.reset();
+}
+
+// Function to build and launch the execution tree.
+Status Iterator::BuildAndLaunchTree(std::shared_ptr<Dataset> ds) {
+  // One time init
+  Status rc;
+  rc = GlobalInit();
+  RETURN_IF_NOT_OK(rc);
+
+  // Instantiate the execution tree
+  tree_ = std::make_shared<ExecutionTree>();
+
+  // Iterative BFS converting Dataset tree into runtime Execution tree.
+  std::queue<std::pair<std::shared_ptr<Dataset>, std::shared_ptr<DatasetOp>>> q;
+
+  if (ds != nullptr) {
+    // Convert the current root node.
+    auto root_op = ds->Build()->front();
+    RETURN_UNEXPECTED_IF_NULL(root_op);
+
+    RETURN_IF_NOT_OK(tree_->AssociateNode(root_op));
+
+    q.push(std::make_pair(ds, root_op));
+
+    // Traverse down to the children and convert them to the corresponding DatasetOps (i.e. execution tree nodes)
+    while (!q.empty()) {
+      auto node_pair = q.front();
+      q.pop();
+      // Iterate through all the direct children of the first element in our BFS queue
+      for (auto child : node_pair.first->children) {
+        auto child_ops = child->Build();
+        RETURN_UNEXPECTED_IF_NULL(child_ops);
+        auto node_op = node_pair.second;
+        // Iterate through all the DatasetOps returned by calling Build on the last Dataset object, associate them
+        // with the execution tree and add the child and parent relationship between the nodes
+        // Note that some Dataset objects might return more than one DatasetOps
+        // e.g. MapDataset will return MapOp and ProjectOp if project_columns is set for MapDataset
+        for (auto child_op : *child_ops) {
+          RETURN_IF_NOT_OK(tree_->AssociateNode(child_op));
+          RETURN_IF_NOT_OK(node_op->AddChild(child_op));
+          node_op = child_op;
+        }
+        // Add the child and the last element of the returned DatasetOps (which is now the leaf node in our current
+        // execution tree) to the BFS queue
+        q.push(std::make_pair(child, child_ops->back()));
+      }
+    }
+    RETURN_IF_NOT_OK(tree_->AssignRoot(root_op));
+  }
+
+  // Launch the execution tree.
+  RETURN_IF_NOT_OK(tree_->Prepare());
+  RETURN_IF_NOT_OK(tree_->Launch());
+  iterator_ = std::make_unique<DatasetIterator>(tree_);
+  RETURN_UNEXPECTED_IF_NULL(iterator_);
+
+  return rc;
+}
+
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/api/python_bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python_bindings.cc
similarity index 83%
rename from mindspore/ccsrc/dataset/api/python_bindings.cc
rename to mindspore/ccsrc/minddata/dataset/api/python_bindings.cc
index ed3f993fb86..145291ec3be 100644
--- a/mindspore/ccsrc/dataset/api/python_bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python_bindings.cc
@@ -15,91 +15,92 @@
  */
 #include <exception>
 
-#include "dataset/api/de_pipeline.h"
-#include "dataset/engine/datasetops/source/cifar_op.h"
-#include "dataset/engine/datasetops/source/clue_op.h"
-#include "dataset/engine/datasetops/source/coco_op.h"
-#include "dataset/engine/datasetops/source/image_folder_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/datasetops/source/manifest_op.h"
-#include "dataset/engine/datasetops/source/mindrecord_op.h"
-#include "dataset/engine/datasetops/source/mnist_op.h"
-#include "dataset/engine/datasetops/source/random_data_op.h"
-#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/python_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
-#include "dataset/engine/datasetops/source/text_file_op.h"
-#include "dataset/engine/datasetops/source/tf_reader_op.h"
-#include "dataset/engine/datasetops/source/voc_op.h"
-#include "dataset/engine/gnn/graph.h"
-#include "dataset/engine/jagged_connector.h"
-#include "dataset/kernels/data/concatenate_op.h"
-#include "dataset/kernels/data/duplicate_op.h"
-#include "dataset/kernels/data/fill_op.h"
-#include "dataset/kernels/data/mask_op.h"
-#include "dataset/kernels/data/one_hot_op.h"
-#include "dataset/kernels/data/pad_end_op.h"
-#include "dataset/kernels/data/slice_op.h"
-#include "dataset/kernels/data/to_float16_op.h"
-#include "dataset/kernels/data/type_cast_op.h"
-#include "dataset/kernels/image/bounding_box_augment_op.h"
-#include "dataset/kernels/image/center_crop_op.h"
-#include "dataset/kernels/image/cut_out_op.h"
-#include "dataset/kernels/image/decode_op.h"
-#include "dataset/kernels/image/hwc_to_chw_op.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/image/normalize_op.h"
-#include "dataset/kernels/image/pad_op.h"
-#include "dataset/kernels/image/random_color_adjust_op.h"
-#include "dataset/kernels/image/random_crop_and_resize_op.h"
-#include "dataset/kernels/image/random_crop_and_resize_with_bbox_op.h"
-#include "dataset/kernels/image/random_crop_decode_resize_op.h"
-#include "dataset/kernels/image/random_crop_op.h"
-#include "dataset/kernels/image/random_crop_with_bbox_op.h"
-#include "dataset/kernels/image/random_horizontal_flip_bbox_op.h"
-#include "dataset/kernels/image/random_horizontal_flip_op.h"
-#include "dataset/kernels/image/random_resize_op.h"
-#include "dataset/kernels/image/random_resize_with_bbox_op.h"
-#include "dataset/kernels/image/random_rotation_op.h"
-#include "dataset/kernels/image/random_vertical_flip_op.h"
-#include "dataset/kernels/image/random_vertical_flip_with_bbox_op.h"
-#include "dataset/kernels/image/rescale_op.h"
-#include "dataset/kernels/image/resize_bilinear_op.h"
-#include "dataset/kernels/image/resize_op.h"
-#include "dataset/kernels/image/resize_with_bbox_op.h"
-#include "dataset/kernels/image/uniform_aug_op.h"
-#include "dataset/kernels/no_op.h"
-#include "dataset/text/kernels/jieba_tokenizer_op.h"
-#include "dataset/text/kernels/lookup_op.h"
-#include "dataset/text/kernels/ngram_op.h"
-#include "dataset/text/kernels/to_number_op.h"
-#include "dataset/text/kernels/unicode_char_tokenizer_op.h"
-#include "dataset/text/kernels/wordpiece_tokenizer_op.h"
-#include "dataset/text/vocab.h"
-#include "dataset/util/random.h"
-#include "mindrecord/include/shard_distributed_sample.h"
-#include "mindrecord/include/shard_operator.h"
-#include "mindrecord/include/shard_pk_sample.h"
-#include "mindrecord/include/shard_sample.h"
-#include "mindrecord/include/shard_sequential_sample.h"
-#include "mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.h"
+#include "minddata/dataset/api/de_pipeline.h"
+#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
+#include "minddata/dataset/engine/datasetops/source/clue_op.h"
+#include "minddata/dataset/engine/datasetops/source/coco_op.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/manifest_op.h"
+#include "minddata/dataset/engine/datasetops/source/mindrecord_op.h"
+#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
+#include "minddata/dataset/engine/datasetops/source/random_data_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/python_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/text_file_op.h"
+#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
+#include "minddata/dataset/engine/datasetops/source/voc_op.h"
+#include "minddata/dataset/engine/cache/cache_client.h"
+#include "minddata/dataset/engine/gnn/graph.h"
+#include "minddata/dataset/engine/jagged_connector.h"
+#include "minddata/dataset/kernels/data/concatenate_op.h"
+#include "minddata/dataset/kernels/data/duplicate_op.h"
+#include "minddata/dataset/kernels/data/fill_op.h"
+#include "minddata/dataset/kernels/data/mask_op.h"
+#include "minddata/dataset/kernels/data/one_hot_op.h"
+#include "minddata/dataset/kernels/data/pad_end_op.h"
+#include "minddata/dataset/kernels/data/slice_op.h"
+#include "minddata/dataset/kernels/data/to_float16_op.h"
+#include "minddata/dataset/kernels/data/type_cast_op.h"
+#include "minddata/dataset/kernels/image/bounding_box_augment_op.h"
+#include "minddata/dataset/kernels/image/center_crop_op.h"
+#include "minddata/dataset/kernels/image/cut_out_op.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/kernels/image/hwc_to_chw_op.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/image/normalize_op.h"
+#include "minddata/dataset/kernels/image/pad_op.h"
+#include "minddata/dataset/kernels/image/random_color_adjust_op.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/random_crop_decode_resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_op.h"
+#include "minddata/dataset/kernels/image/random_crop_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/random_horizontal_flip_op.h"
+#include "minddata/dataset/kernels/image/random_resize_op.h"
+#include "minddata/dataset/kernels/image/random_resize_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/random_rotation_op.h"
+#include "minddata/dataset/kernels/image/random_vertical_flip_op.h"
+#include "minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/rescale_op.h"
+#include "minddata/dataset/kernels/image/resize_bilinear_op.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/image/resize_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/uniform_aug_op.h"
+#include "minddata/dataset/kernels/no_op.h"
+#include "minddata/dataset/text/kernels/jieba_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/lookup_op.h"
+#include "minddata/dataset/text/kernels/ngram_op.h"
+#include "minddata/dataset/text/kernels/to_number_op.h"
+#include "minddata/dataset/text/kernels/unicode_char_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/wordpiece_tokenizer_op.h"
+#include "minddata/dataset/text/vocab.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/mindrecord/include/shard_distributed_sample.h"
+#include "minddata/mindrecord/include/shard_operator.h"
+#include "minddata/mindrecord/include/shard_pk_sample.h"
+#include "minddata/mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_sequential_sample.h"
+#include "mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 #include "pybind11/stl_bind.h"
 
 #ifdef ENABLE_ICU4C
-#include "dataset/text/kernels/basic_tokenizer_op.h"
-#include "dataset/text/kernels/bert_tokenizer_op.h"
-#include "dataset/text/kernels/case_fold_op.h"
-#include "dataset/text/kernels/normalize_utf8_op.h"
-#include "dataset/text/kernels/regex_replace_op.h"
-#include "dataset/text/kernels/regex_tokenizer_op.h"
-#include "dataset/text/kernels/unicode_script_tokenizer_op.h"
-#include "dataset/text/kernels/whitespace_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/basic_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/bert_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/case_fold_op.h"
+#include "minddata/dataset/text/kernels/normalize_utf8_op.h"
+#include "minddata/dataset/text/kernels/regex_replace_op.h"
+#include "minddata/dataset/text/kernels/regex_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/unicode_script_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/whitespace_tokenizer_op.h"
 #endif
 
 namespace py = pybind11;
@@ -297,7 +298,7 @@ void bindTensor(py::module *m) {
     }))
     .def_buffer([](Tensor &tensor) {
       py::buffer_info info;
-      THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info));
+      THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info));
       return info;
     })
     .def("__str__", &Tensor::ToString)
@@ -311,7 +312,7 @@ void bindTensor(py::module *m) {
         return res;
       }
       py::buffer_info info;
-      THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info));
+      THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info));
       return py::array(pybind11::dtype(info), info.shape, info.strides, info.ptr, t);
     });
 
@@ -601,39 +602,57 @@ void bindTensorOps4(py::module *m) {
 
 void bindTokenizerOps(py::module *m) {
   (void)py::class_<JiebaTokenizerOp, TensorOp, std::shared_ptr<JiebaTokenizerOp>>(*m, "JiebaTokenizerOp", "")
-    .def(py::init<const std::string, std::string, JiebaMode>(), py::arg("hmm_path"), py::arg("mp_path"),
-         py::arg("mode") = JiebaMode::kMix)
+    .def(py::init<const std::string &, const std::string &, const JiebaMode &, const bool &>(), py::arg("hmm_path"),
+         py::arg("mp_path"), py::arg("mode") = JiebaMode::kMix,
+         py::arg("with_offsets") = JiebaTokenizerOp::kDefWithOffsets)
     .def("add_word",
          [](JiebaTokenizerOp &self, const std::string word, int freq) { THROW_IF_ERROR(self.AddWord(word, freq)); });
   (void)py::class_<UnicodeCharTokenizerOp, TensorOp, std::shared_ptr<UnicodeCharTokenizerOp>>(
     *m, "UnicodeCharTokenizerOp", "Tokenize a scalar tensor of UTF-8 string to Unicode characters.")
-    .def(py::init<>());
+    .def(py::init<const bool &>(), py::arg("with_offsets") = UnicodeCharTokenizerOp::kDefWithOffsets);
   (void)py::class_<LookupOp, TensorOp, std::shared_ptr<LookupOp>>(*m, "LookupOp",
-                                                                  "Tensor operation to LookUp each word")
-    .def(py::init<std::shared_ptr<Vocab>, WordIdType>(), py::arg("vocab"), py::arg("unknown"))
-    .def(py::init<std::shared_ptr<Vocab>>(), py::arg("vocab"));
-  (void)py::class_<NgramOp, TensorOp, std::shared_ptr<NgramOp>>(*m, "NgramOp", "TensorOp performs ngram mapping")
+                                                                  "Tensor operation to LookUp each word.")
+    .def(py::init([](std::shared_ptr<Vocab> vocab, const py::object &py_word) {
+      if (vocab == nullptr) {
+        THROW_IF_ERROR(Status(StatusCode::kUnexpectedError, "vocab object type is incorrect or null."));
+      }
+      if (py_word.is_none()) {
+        return std::make_shared<LookupOp>(vocab, Vocab::kNoTokenExists);
+      }
+      std::string word = py::reinterpret_borrow<py::str>(py_word);
+      WordIdType default_id = vocab->Lookup(word);
+      if (default_id == Vocab::kNoTokenExists) {
+        THROW_IF_ERROR(
+          Status(StatusCode::kUnexpectedError, "default unknown token:" + word + " doesn't exist in vocab."));
+      }
+      return std::make_shared<LookupOp>(vocab, default_id);
+    }));
+  (void)py::class_<NgramOp, TensorOp, std::shared_ptr<NgramOp>>(*m, "NgramOp", "TensorOp performs ngram mapping.")
     .def(py::init<const std::vector<int32_t> &, int32_t, int32_t, const std::string &, const std::string &,
                   const std::string &>(),
          py::arg("ngrams"), py::arg("l_pad_len"), py::arg("r_pad_len"), py::arg("l_pad_token"), py::arg("r_pad_token"),
          py::arg("separator"));
   (void)py::class_<WordpieceTokenizerOp, TensorOp, std::shared_ptr<WordpieceTokenizerOp>>(
     *m, "WordpieceTokenizerOp", "Tokenize scalar token or 1-D tokens to subword tokens.")
-    .def(py::init<const std::shared_ptr<Vocab> &, const std::string &, const int &, const std::string &>(),
-         py::arg("vocab"), py::arg("suffix_indicator") = std::string(WordpieceTokenizerOp::kDefSuffixIndicator),
-         py::arg("max_bytes_per_token") = WordpieceTokenizerOp::kDefMaxBytesPerToken,
-         py::arg("unknown_token") = std::string(WordpieceTokenizerOp::kDefUnknownToken));
+    .def(
+      py::init<const std::shared_ptr<Vocab> &, const std::string &, const int &, const std::string &, const bool &>(),
+      py::arg("vocab"), py::arg("suffix_indicator") = std::string(WordpieceTokenizerOp::kDefSuffixIndicator),
+      py::arg("max_bytes_per_token") = WordpieceTokenizerOp::kDefMaxBytesPerToken,
+      py::arg("unknown_token") = std::string(WordpieceTokenizerOp::kDefUnknownToken),
+      py::arg("with_offsets") = WordpieceTokenizerOp::kDefWithOffsets);
 }
 
 void bindDependIcuTokenizerOps(py::module *m) {
 #ifdef ENABLE_ICU4C
   (void)py::class_<WhitespaceTokenizerOp, TensorOp, std::shared_ptr<WhitespaceTokenizerOp>>(
     *m, "WhitespaceTokenizerOp", "Tokenize a scalar tensor of UTF-8 string on ICU defined whitespaces.")
-    .def(py::init<>());
+    .def(py::init<const bool &>(), py::arg("with_offsets") = WhitespaceTokenizerOp::kDefWithOffsets);
   (void)py::class_<UnicodeScriptTokenizerOp, TensorOp, std::shared_ptr<UnicodeScriptTokenizerOp>>(
     *m, "UnicodeScriptTokenizerOp", "Tokenize a scalar tensor of UTF-8 string on Unicode script boundaries.")
     .def(py::init<>())
-    .def(py::init<bool>(), py::arg("keep_whitespace") = UnicodeScriptTokenizerOp::kDefKeepWhitespace);
+    .def(py::init<const bool &, const bool &>(),
+         py::arg("keep_whitespace") = UnicodeScriptTokenizerOp::kDefKeepWhitespace,
+         py::arg("with_offsets") = UnicodeScriptTokenizerOp::kDefWithOffsets);
   (void)py::class_<CaseFoldOp, TensorOp, std::shared_ptr<CaseFoldOp>>(
     *m, "CaseFoldOp", "Apply case fold operation on utf-8 string tensor")
     .def(py::init<>());
@@ -647,24 +666,28 @@ void bindDependIcuTokenizerOps(py::module *m) {
          py::arg("replace_all"));
   (void)py::class_<RegexTokenizerOp, TensorOp, std::shared_ptr<RegexTokenizerOp>>(
     *m, "RegexTokenizerOp", "Tokenize a scalar tensor of UTF-8 string by regex expression pattern.")
-    .def(py::init<const std::string &, const std::string &>(), py::arg("delim_pattern"), py::arg("keep_delim_pattern"));
+    .def(py::init<const std::string &, const std::string &, const bool &>(), py::arg("delim_pattern"),
+         py::arg("keep_delim_pattern"), py::arg("with_offsets") = RegexTokenizerOp::kDefWithOffsets);
   (void)py::class_<BasicTokenizerOp, TensorOp, std::shared_ptr<BasicTokenizerOp>>(
     *m, "BasicTokenizerOp", "Tokenize a scalar tensor of UTF-8 string by specific rules.")
-    .def(py::init<bool, bool, NormalizeForm, bool>(), py::arg("lower_case") = BasicTokenizerOp::kDefLowerCase,
+    .def(py::init<const bool &, const bool &, const NormalizeForm &, const bool &, const bool &>(),
+         py::arg("lower_case") = BasicTokenizerOp::kDefLowerCase,
          py::arg("keep_whitespace") = BasicTokenizerOp::kDefKeepWhitespace,
          py::arg("normalization_form") = BasicTokenizerOp::kDefNormalizationForm,
-         py::arg("preserve_unused_token") = BasicTokenizerOp::kDefPreserveUnusedToken);
+         py::arg("preserve_unused_token") = BasicTokenizerOp::kDefPreserveUnusedToken,
+         py::arg("with_offsets") = BasicTokenizerOp::kDefWithOffsets);
   (void)py::class_<BertTokenizerOp, TensorOp, std::shared_ptr<BertTokenizerOp>>(*m, "BertTokenizerOp",
                                                                                 "Tokenizer used for Bert text process.")
-    .def(py::init<const std::shared_ptr<Vocab> &, const std::string &, const int &, const std::string &, bool, bool,
-                  NormalizeForm, bool>(),
+    .def(py::init<const std::shared_ptr<Vocab> &, const std::string &, const int &, const std::string &, const bool &,
+                  const bool &, const NormalizeForm &, const bool &, const bool &>(),
          py::arg("vocab"), py::arg("suffix_indicator") = std::string(WordpieceTokenizerOp::kDefSuffixIndicator),
          py::arg("max_bytes_per_token") = WordpieceTokenizerOp::kDefMaxBytesPerToken,
          py::arg("unknown_token") = std::string(WordpieceTokenizerOp::kDefUnknownToken),
          py::arg("lower_case") = BasicTokenizerOp::kDefLowerCase,
          py::arg("keep_whitespace") = BasicTokenizerOp::kDefKeepWhitespace,
          py::arg("normalization_form") = BasicTokenizerOp::kDefNormalizationForm,
-         py::arg("preserve_unused_token") = BasicTokenizerOp::kDefPreserveUnusedToken);
+         py::arg("preserve_unused_token") = BasicTokenizerOp::kDefPreserveUnusedToken,
+         py::arg("with_offsets") = WordpieceTokenizerOp::kDefWithOffsets);
 #endif
 }
 
@@ -746,6 +769,11 @@ void bindInfoObjects(py::module *m) {
     .def("get_batch_num", &BatchOp::CBatchInfo::get_batch_num);
 }
 
+void bindCacheClient(py::module *m) {
+  (void)py::class_<CacheClient, std::shared_ptr<CacheClient>>(*m, "CacheClient")
+    .def(py::init<uint32_t, uint64_t, bool>());
+}
+
 void bindVocabObjects(py::module *m) {
   (void)py::class_<Vocab, std::shared_ptr<Vocab>>(*m, "Vocab")
     .def(py::init<>())
@@ -820,6 +848,12 @@ void bindGraphData(py::module *m) {
            THROW_IF_ERROR(g.GetNodeFeature(node_list, feature_types, &out));
            return out.getRow();
          })
+    .def("get_edge_feature",
+         [](gnn::Graph &g, std::shared_ptr<Tensor> edge_list, std::vector<gnn::FeatureType> feature_types) {
+           TensorRow out;
+           THROW_IF_ERROR(g.GetEdgeFeature(edge_list, feature_types, &out));
+           return out.getRow();
+         })
     .def("graph_info",
          [](gnn::Graph &g) {
            py::dict out;
@@ -911,6 +945,7 @@ PYBIND11_MODULE(_c_dataengine, m) {
   bindSamplerOps(&m);
   bindDatasetOps(&m);
   bindInfoObjects(&m);
+  bindCacheClient(&m);
   bindVocabObjects(&m);
   bindGraphData(&m);
   bindDependIcuTokenizerOps(&m);
diff --git a/mindspore/ccsrc/minddata/dataset/api/samplers.cc b/mindspore/ccsrc/minddata/dataset/api/samplers.cc
new file mode 100644
index 00000000000..91421f0ff86
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/api/samplers.cc
@@ -0,0 +1,224 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "minddata/dataset/include/samplers.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+
+namespace mindspore {
+namespace dataset {
+namespace api {
+
+SamplerObj::SamplerObj() {}
+
+/// Function to create a Distributed Sampler.
+std::shared_ptr<DistributedSamplerObj> DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle,
+                                                          int64_t num_samples, uint32_t seed) {
+  auto sampler = std::make_shared<DistributedSamplerObj>(num_shards, shard_id, shuffle, num_samples, seed);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/// Function to create a PK Sampler.
+std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle, int64_t num_samples) {
+  auto sampler = std::make_shared<PKSamplerObj>(num_val, shuffle, num_samples);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/// Function to create a Random Sampler.
+std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement, int64_t num_samples) {
+  auto sampler = std::make_shared<RandomSamplerObj>(replacement, num_samples);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/// Function to create a Sequential Sampler.
+std::shared_ptr<SequentialSamplerObj> SequentialSampler(int64_t start_index, int64_t num_samples) {
+  auto sampler = std::make_shared<SequentialSamplerObj>(start_index, num_samples);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/// Function to create a Subset Random Sampler.
+std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(const std::vector<int64_t> &indices, int64_t num_samples) {
+  auto sampler = std::make_shared<SubsetRandomSamplerObj>(indices, num_samples);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/// Function to create a Weighted Random Sampler.
+std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(const std::vector<double> &weights, int64_t num_samples,
+                                                                bool replacement) {
+  auto sampler = std::make_shared<WeightedRandomSamplerObj>(weights, num_samples, replacement);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/* ####################################### Derived Sampler classes ################################# */
+
+// DistributedSampler
+DistributedSamplerObj::DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples,
+                                             uint32_t seed)
+    : num_shards_(num_shards), shard_id_(shard_id), shuffle_(shuffle), num_samples_(num_samples), seed_(seed) {}
+
+bool DistributedSamplerObj::ValidateParams() {
+  if (num_shards_ <= 0) {
+    MS_LOG(ERROR) << "DistributedSampler: invalid num_shards: " << num_shards_;
+    return false;
+  }
+
+  if (shard_id_ < 0 || shard_id_ >= num_shards_) {
+    MS_LOG(ERROR) << "DistributedSampler: invalid input, shard_id: " << shard_id_ << ", num_shards: " << num_shards_;
+    return false;
+  }
+
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "DistributedSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+
+  return true;
+}
+
+std::shared_ptr<Sampler> DistributedSamplerObj::Build() {
+  return std::make_shared<dataset::DistributedSampler>(num_samples_, num_shards_, shard_id_, shuffle_, seed_);
+}
+
+// PKSampler
+PKSamplerObj::PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples)
+    : num_val_(num_val), shuffle_(shuffle), num_samples_(num_samples) {}
+
+bool PKSamplerObj::ValidateParams() {
+  if (num_val_ <= 0) {
+    MS_LOG(ERROR) << "PKSampler: invalid num_val: " << num_val_;
+    return false;
+  }
+
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "PKSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<Sampler> PKSamplerObj::Build() {
+  return std::make_shared<dataset::PKSampler>(num_samples_, num_val_, shuffle_);
+}
+
+// RandomSampler
+RandomSamplerObj::RandomSamplerObj(bool replacement, int64_t num_samples)
+    : replacement_(replacement), num_samples_(num_samples) {}
+
+bool RandomSamplerObj::ValidateParams() {
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "RandomSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<Sampler> RandomSamplerObj::Build() {
+  bool reshuffle_each_epoch = true;
+  auto sampler = std::make_shared<dataset::RandomSampler>(num_samples_, replacement_, reshuffle_each_epoch);
+  return sampler;
+}
+
+// SequentialSampler
+SequentialSamplerObj::SequentialSamplerObj(int64_t start_index, int64_t num_samples)
+    : start_index_(start_index), num_samples_(num_samples) {}
+
+bool SequentialSamplerObj::ValidateParams() {
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "SequentialSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+
+  if (start_index_ < 0) {
+    MS_LOG(ERROR) << "SequentialSampler: invalid start_index: " << start_index_;
+    return false;
+  }
+
+  return true;
+}
+
+std::shared_ptr<Sampler> SequentialSamplerObj::Build() {
+  auto sampler = std::make_shared<dataset::SequentialSampler>(num_samples_, start_index_);
+  return sampler;
+}
+
+// SubsetRandomSampler
+SubsetRandomSamplerObj::SubsetRandomSamplerObj(const std::vector<int64_t> &indices, int64_t num_samples)
+    : indices_(indices), num_samples_(num_samples) {}
+
+bool SubsetRandomSamplerObj::ValidateParams() {
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "SubsetRandomSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+
+  return true;
+}
+
+std::shared_ptr<Sampler> SubsetRandomSamplerObj::Build() {
+  auto sampler = std::make_shared<dataset::SubsetRandomSampler>(num_samples_, indices_);
+  return sampler;
+}
+
+// WeightedRandomSampler
+WeightedRandomSamplerObj::WeightedRandomSamplerObj(const std::vector<double> &weights, int64_t num_samples,
+                                                   bool replacement)
+    : weights_(weights), num_samples_(num_samples), replacement_(replacement) {}
+
+bool WeightedRandomSamplerObj::ValidateParams() {
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "WeightedRandomSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<Sampler> WeightedRandomSamplerObj::Build() {
+  auto sampler = std::make_shared<dataset::WeightedRandomSampler>(num_samples_, weights_, replacement_);
+  return sampler;
+}
+
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/api/transforms.cc b/mindspore/ccsrc/minddata/dataset/api/transforms.cc
new file mode 100644
index 00000000000..59a25ef9f54
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/api/transforms.cc
@@ -0,0 +1,491 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "minddata/dataset/include/transforms.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/image/normalize_op.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_op.h"
+#include "minddata/dataset/kernels/image/center_crop_op.h"
+#include "minddata/dataset/kernels/image/uniform_aug_op.h"
+#include "minddata/dataset/kernels/image/random_horizontal_flip_op.h"
+#include "minddata/dataset/kernels/image/random_vertical_flip_op.h"
+#include "minddata/dataset/kernels/image/random_rotation_op.h"
+#include "minddata/dataset/kernels/image/cut_out_op.h"
+#include "minddata/dataset/kernels/image/random_color_adjust_op.h"
+#include "minddata/dataset/kernels/image/pad_op.h"
+
+namespace mindspore {
+namespace dataset {
+namespace api {
+
+TensorOperation::TensorOperation() {}
+
+// Transform operations for computer vision.
+namespace vision {
+
+// Function to create NormalizeOperation.
+std::shared_ptr<NormalizeOperation> Normalize(std::vector<float> mean, std::vector<float> std) {
+  auto op = std::make_shared<NormalizeOperation>(mean, std);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create DecodeOperation.
+std::shared_ptr<DecodeOperation> Decode(bool rgb) {
+  auto op = std::make_shared<DecodeOperation>(rgb);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create ResizeOperation.
+std::shared_ptr<ResizeOperation> Resize(std::vector<int32_t> size, InterpolationMode interpolation) {
+  auto op = std::make_shared<ResizeOperation>(size, interpolation);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create RandomCropOperation.
+std::shared_ptr<RandomCropOperation> RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding,
+                                                bool pad_if_needed, std::vector<uint8_t> fill_value) {
+  auto op = std::make_shared<RandomCropOperation>(size, padding, pad_if_needed, fill_value);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create CenterCropOperation.
+std::shared_ptr<CenterCropOperation> CenterCrop(std::vector<int32_t> size) {
+  auto op = std::make_shared<CenterCropOperation>(size);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create UniformAugOperation.
+std::shared_ptr<UniformAugOperation> UniformAugment(std::vector<std::shared_ptr<TensorOperation>> operations,
+                                                    int32_t num_ops) {
+  auto op = std::make_shared<UniformAugOperation>(operations, num_ops);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create RandomHorizontalFlipOperation.
+std::shared_ptr<RandomHorizontalFlipOperation> RandomHorizontalFlip(float prob) {
+  auto op = std::make_shared<RandomHorizontalFlipOperation>(prob);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create RandomVerticalFlipOperation.
+std::shared_ptr<RandomVerticalFlipOperation> RandomVerticalFlip(float prob) {
+  auto op = std::make_shared<RandomVerticalFlipOperation>(prob);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create RandomRotationOperation.
+std::shared_ptr<RandomRotationOperation> RandomRotation(std::vector<float> degrees, InterpolationMode resample,
+                                                        bool expand, std::vector<float> center,
+                                                        std::vector<uint8_t> fill_value) {
+  auto op = std::make_shared<RandomRotationOperation>(degrees, resample, expand, center, fill_value);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create PadOperation.
+std::shared_ptr<PadOperation> Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value,
+                                  BorderType padding_mode) {
+  auto op = std::make_shared<PadOperation>(padding, fill_value, padding_mode);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create CutOutOp.
+std::shared_ptr<CutOutOperation> CutOut(int32_t length, int32_t num_patches) {
+  auto op = std::make_shared<CutOutOperation>(length, num_patches);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+// Function to create RandomColorAdjustOperation.
+std::shared_ptr<RandomColorAdjustOperation> RandomColorAdjust(std::vector<float> brightness,
+                                                              std::vector<float> contrast,
+                                                              std::vector<float> saturation, std::vector<float> hue) {
+  auto op = std::make_shared<RandomColorAdjustOperation>(brightness, contrast, saturation, hue);
+  // Input validation
+  if (!op->ValidateParams()) {
+    return nullptr;
+  }
+  return op;
+}
+
+/* ####################################### Derived TensorOperation classes ################################# */
+
+// NormalizeOperation
+NormalizeOperation::NormalizeOperation(std::vector<float> mean, std::vector<float> std) : mean_(mean), std_(std) {}
+
+bool NormalizeOperation::ValidateParams() {
+  if (mean_.size() != 3) {
+    MS_LOG(ERROR) << "Normalize: mean vector has incorrect size: " << mean_.size();
+    return false;
+  }
+
+  if (std_.size() != 3) {
+    MS_LOG(ERROR) << "Normalize: std vector has incorrect size: " << std_.size();
+    return false;
+  }
+
+  return true;
+}
+
+std::shared_ptr<TensorOp> NormalizeOperation::Build() {
+  return std::make_shared<NormalizeOp>(mean_[0], mean_[1], mean_[2], std_[0], std_[1], std_[2]);
+}
+
+// DecodeOperation
+DecodeOperation::DecodeOperation(bool rgb) : rgb_(rgb) {}
+
+bool DecodeOperation::ValidateParams() { return true; }
+
+std::shared_ptr<TensorOp> DecodeOperation::Build() { return std::make_shared<DecodeOp>(rgb_); }
+
+// ResizeOperation
+ResizeOperation::ResizeOperation(std::vector<int32_t> size, InterpolationMode interpolation)
+    : size_(size), interpolation_(interpolation) {}
+
+bool ResizeOperation::ValidateParams() {
+  if (size_.empty() || size_.size() > 2) {
+    MS_LOG(ERROR) << "Resize: size vector has incorrect size: " << size_.size();
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<TensorOp> ResizeOperation::Build() {
+  int32_t height = size_[0];
+  int32_t width = 0;
+
+  // User specified the width value.
+  if (size_.size() == 2) {
+    width = size_[1];
+  }
+
+  return std::make_shared<ResizeOp>(height, width, interpolation_);
+}
+
+// RandomCropOperation
+RandomCropOperation::RandomCropOperation(std::vector<int32_t> size, std::vector<int32_t> padding, bool pad_if_needed,
+                                         std::vector<uint8_t> fill_value)
+    : size_(size), padding_(padding), pad_if_needed_(pad_if_needed), fill_value_(fill_value) {}
+
+bool RandomCropOperation::ValidateParams() {
+  if (size_.empty() || size_.size() > 2) {
+    MS_LOG(ERROR) << "RandomCrop: size vector has incorrect size: " << size_.size();
+    return false;
+  }
+
+  if (padding_.empty() || padding_.size() != 4) {
+    MS_LOG(ERROR) << "RandomCrop: padding vector has incorrect size: padding.size()";
+    return false;
+  }
+
+  if (fill_value_.empty() || fill_value_.size() != 3) {
+    MS_LOG(ERROR) << "RandomCrop: fill_value vector has incorrect size: fill_value.size()";
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<TensorOp> RandomCropOperation::Build() {
+  int32_t crop_height = size_[0];
+  int32_t crop_width = 0;
+
+  int32_t pad_top = padding_[0];
+  int32_t pad_bottom = padding_[1];
+  int32_t pad_left = padding_[2];
+  int32_t pad_right = padding_[3];
+
+  uint8_t fill_r = fill_value_[0];
+  uint8_t fill_g = fill_value_[1];
+  uint8_t fill_b = fill_value_[2];
+
+  // User has specified the crop_width value.
+  if (size_.size() == 2) {
+    crop_width = size_[1];
+  }
+
+  auto tensor_op = std::make_shared<RandomCropOp>(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right,
+                                                  BorderType::kConstant, pad_if_needed_, fill_r, fill_g, fill_b);
+  return tensor_op;
+}
+
+// CenterCropOperation
+CenterCropOperation::CenterCropOperation(std::vector<int32_t> size) : size_(size) {}
+
+bool CenterCropOperation::ValidateParams() {
+  if (size_.empty() || size_.size() > 2) {
+    MS_LOG(ERROR) << "CenterCrop: size vector has incorrect size.";
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<TensorOp> CenterCropOperation::Build() {
+  int32_t crop_height = size_[0];
+  int32_t crop_width = 0;
+
+  // User has specified crop_width.
+  if (size_.size() == 2) {
+    crop_width = size_[1];
+  }
+
+  std::shared_ptr<CenterCropOp> tensor_op = std::make_shared<CenterCropOp>(crop_height, crop_width);
+  return tensor_op;
+}
+
+// UniformAugOperation
+UniformAugOperation::UniformAugOperation(std::vector<std::shared_ptr<TensorOperation>> operations, int32_t num_ops)
+    : operations_(operations), num_ops_(num_ops) {}
+
+bool UniformAugOperation::ValidateParams() { return true; }
+
+std::shared_ptr<TensorOp> UniformAugOperation::Build() {
+  std::vector<std::shared_ptr<TensorOp>> tensor_ops;
+  (void)std::transform(operations_.begin(), operations_.end(), std::back_inserter(tensor_ops),
+                       [](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
+  std::shared_ptr<UniformAugOp> tensor_op = std::make_shared<UniformAugOp>(tensor_ops, num_ops_);
+  return tensor_op;
+}
+
+// RandomHorizontalFlipOperation
+RandomHorizontalFlipOperation::RandomHorizontalFlipOperation(float probability) : probability_(probability) {}
+
+bool RandomHorizontalFlipOperation::ValidateParams() { return true; }
+
+std::shared_ptr<TensorOp> RandomHorizontalFlipOperation::Build() {
+  std::shared_ptr<RandomHorizontalFlipOp> tensor_op = std::make_shared<RandomHorizontalFlipOp>(probability_);
+  return tensor_op;
+}
+
+// RandomVerticalFlipOperation
+RandomVerticalFlipOperation::RandomVerticalFlipOperation(float probability) : probability_(probability) {}
+
+bool RandomVerticalFlipOperation::ValidateParams() { return true; }
+
+std::shared_ptr<TensorOp> RandomVerticalFlipOperation::Build() {
+  std::shared_ptr<RandomVerticalFlipOp> tensor_op = std::make_shared<RandomVerticalFlipOp>(probability_);
+  return tensor_op;
+}
+
+// Function to create RandomRotationOperation.
+RandomRotationOperation::RandomRotationOperation(std::vector<float> degrees, InterpolationMode interpolation_mode,
+                                                 bool expand, std::vector<float> center,
+                                                 std::vector<uint8_t> fill_value)
+    : degrees_(degrees),
+      interpolation_mode_(interpolation_mode),
+      expand_(expand),
+      center_(center),
+      fill_value_(fill_value) {}
+
+bool RandomRotationOperation::ValidateParams() {
+  if (degrees_.empty() || degrees_.size() != 2) {
+    MS_LOG(ERROR) << "RandomRotation: degrees vector has incorrect size: degrees.size()";
+    return false;
+  }
+  if (center_.empty() || center_.size() != 2) {
+    MS_LOG(ERROR) << "RandomRotation: center vector has incorrect size: center.size()";
+    return false;
+  }
+  if (fill_value_.empty() || fill_value_.size() != 3) {
+    MS_LOG(ERROR) << "RandomRotation: fill_value vector has incorrect size: fill_value.size()";
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<TensorOp> RandomRotationOperation::Build() {
+  std::shared_ptr<RandomRotationOp> tensor_op =
+    std::make_shared<RandomRotationOp>(degrees_[0], degrees_[1], center_[0], center_[1], interpolation_mode_, expand_,
+                                       fill_value_[0], fill_value_[1], fill_value_[2]);
+  return tensor_op;
+}
+
+// PadOperation
+PadOperation::PadOperation(std::vector<int32_t> padding, std::vector<uint8_t> fill_value, BorderType padding_mode)
+    : padding_(padding), fill_value_(fill_value), padding_mode_(padding_mode) {}
+
+bool PadOperation::ValidateParams() {
+  if (padding_.empty() || padding_.size() == 3 || padding_.size() > 4) {
+    MS_LOG(ERROR) << "Pad: padding vector has incorrect size: padding.size()";
+    return false;
+  }
+
+  if (fill_value_.empty() || (fill_value_.size() != 1 && fill_value_.size() != 3)) {
+    MS_LOG(ERROR) << "Pad: fill_value vector has incorrect size: fill_value.size()";
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<TensorOp> PadOperation::Build() {
+  int32_t pad_top, pad_bottom, pad_left, pad_right;
+  switch (padding_.size()) {
+    case 1:
+      pad_left = padding_[0];
+      pad_top = padding_[0];
+      pad_right = padding_[0];
+      pad_bottom = padding_[0];
+      break;
+    case 2:
+      pad_left = padding_[0];
+      pad_top = padding_[1];
+      pad_right = padding_[0];
+      pad_bottom = padding_[1];
+      break;
+    default:
+      pad_left = padding_[0];
+      pad_top = padding_[1];
+      pad_right = padding_[2];
+      pad_bottom = padding_[3];
+  }
+  uint8_t fill_r, fill_g, fill_b;
+
+  fill_r = fill_value_[0];
+  fill_g = fill_value_[0];
+  fill_b = fill_value_[0];
+
+  if (fill_value_.size() == 3) {
+    fill_r = fill_value_[0];
+    fill_g = fill_value_[1];
+    fill_b = fill_value_[2];
+  }
+
+  std::shared_ptr<PadOp> tensor_op =
+    std::make_shared<PadOp>(pad_top, pad_bottom, pad_left, pad_right, padding_mode_, fill_r, fill_g, fill_b);
+  return tensor_op;
+}
+
+// CutOutOperation
+CutOutOperation::CutOutOperation(int32_t length, int32_t num_patches) : length_(length), num_patches_(num_patches) {}
+
+bool CutOutOperation::ValidateParams() {
+  if (length_ < 0) {
+    MS_LOG(ERROR) << "CutOut: length cannot be negative";
+    return false;
+  }
+  if (num_patches_ < 0) {
+    MS_LOG(ERROR) << "CutOut: number of patches cannot be negative";
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<TensorOp> CutOutOperation::Build() {
+  std::shared_ptr<CutOutOp> tensor_op = std::make_shared<CutOutOp>(length_, length_, num_patches_, false, 0, 0, 0);
+  return tensor_op;
+}
+
+// RandomColorAdjustOperation.
+RandomColorAdjustOperation::RandomColorAdjustOperation(std::vector<float> brightness, std::vector<float> contrast,
+                                                       std::vector<float> saturation, std::vector<float> hue)
+    : brightness_(brightness), contrast_(contrast), saturation_(saturation), hue_(hue) {}
+
+bool RandomColorAdjustOperation::ValidateParams() {
+  // Do some input validation.
+  if (brightness_.empty() || brightness_.size() > 2) {
+    MS_LOG(ERROR) << "RandomColorAdjust: brightness must be a vector of one or two values";
+    return false;
+  }
+  if (contrast_.empty() || contrast_.size() > 2) {
+    MS_LOG(ERROR) << "RandomColorAdjust: contrast must be a vector of one or two values";
+    return false;
+  }
+  if (saturation_.empty() || saturation_.size() > 2) {
+    MS_LOG(ERROR) << "RandomColorAdjust: saturation must be a vector of one or two values";
+    return false;
+  }
+  if (hue_.empty() || hue_.size() > 2) {
+    MS_LOG(ERROR) << "RandomColorAdjust: hue must be a vector of one or two values";
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<TensorOp> RandomColorAdjustOperation::Build() {
+  float brightness_lb, brightness_ub, contrast_lb, contrast_ub, saturation_lb, saturation_ub, hue_lb, hue_ub;
+
+  brightness_lb = brightness_[0];
+  brightness_ub = brightness_[0];
+
+  if (brightness_.size() == 2) brightness_ub = brightness_[1];
+
+  contrast_lb = contrast_[0];
+  contrast_ub = contrast_[0];
+
+  if (contrast_.size() == 2) contrast_ub = contrast_[1];
+
+  saturation_lb = saturation_[0];
+  saturation_ub = saturation_[0];
+
+  if (saturation_.size() == 2) saturation_ub = saturation_[1];
+
+  hue_lb = hue_[0];
+  hue_ub = hue_[0];
+
+  if (hue_.size() == 2) hue_ub = hue_[1];
+
+  std::shared_ptr<RandomColorAdjustOp> tensor_op = std::make_shared<RandomColorAdjustOp>(
+    brightness_lb, brightness_ub, contrast_lb, contrast_ub, saturation_lb, saturation_ub, hue_lb, hue_ub);
+  return tensor_op;
+}
+
+}  // namespace vision
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/core/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/core/CMakeLists.txt
similarity index 70%
rename from mindspore/ccsrc/dataset/core/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/core/CMakeLists.txt
index 27b9f0e13b8..bfe6e675631 100644
--- a/mindspore/ccsrc/dataset/core/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/core/CMakeLists.txt
@@ -1,10 +1,6 @@
-ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto)
-ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto)
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(core OBJECT
-  ${EXAMPLE_SRCS}
-  ${FEATURE_SRCS}
+set(DATASET_CORE_SRC_FILES
   client.cc
   config_manager.cc
   cv_tensor.cc
@@ -13,6 +9,13 @@ add_library(core OBJECT
   tensor.cc
   tensor_row.cc
   tensor_shape.cc
-  )
+)
+
+ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto)
+ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto)
+add_library(core OBJECT ${DATASET_CORE_SRC_FILES} ${EXAMPLE_SRCS} ${FEATURE_SRCS})
 add_dependencies(core mindspore::protobuf)
-target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS})
+
+if (ENABLE_PYTHON)
+  target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS})
+endif()
diff --git a/mindspore/ccsrc/dataset/core/client.cc b/mindspore/ccsrc/minddata/dataset/core/client.cc
similarity index 80%
rename from mindspore/ccsrc/dataset/core/client.cc
rename to mindspore/ccsrc/minddata/dataset/core/client.cc
index 6247ddae7d5..e3fd844e66f 100644
--- a/mindspore/ccsrc/dataset/core/client.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/client.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/client.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/global_context.h"
-#include "dataset/util/services.h"
-#include "dataset/util/sig_handler.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/sig_handler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/minddata/dataset/core/client.h b/mindspore/ccsrc/minddata/dataset/core/client.h
new file mode 100644
index 00000000000..78b298e616a
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/core/client.h
@@ -0,0 +1,61 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_CORE_CLIENT_H_
+#define DATASET_CORE_CLIENT_H_
+
+// client.h
+// Include file for DE client functions
+
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/source/mindrecord_op.h"
+#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
+
+#ifdef ENABLE_PYTHON
+#include "minddata/dataset/engine/datasetops/barrier_op.h"
+#include "minddata/dataset/engine/datasetops/filter_op.h"
+#include "minddata/dataset/engine/datasetops/source/generator_op.h"
+#include "minddata/dataset/engine/datasetops/build_vocab_op.h"
+#endif
+
+#include "minddata/dataset/engine/datasetops/batch_op.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/datasetops/device_queue_op.h"
+#include "minddata/dataset/engine/datasetops/map_op.h"
+#include "minddata/dataset/engine/datasetops/project_op.h"
+#include "minddata/dataset/engine/datasetops/rename_op.h"
+#include "minddata/dataset/engine/datasetops/repeat_op.h"
+#include "minddata/dataset/engine/datasetops/skip_op.h"
+#include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/datasetops/take_op.h"
+#include "minddata/dataset/engine/datasetops/zip_op.h"
+#include "minddata/dataset/engine/datasetops/concat_op.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/util/status.h"
+
+namespace mindspore {
+namespace dataset {
+// This is a one-time global initializer that needs to be called at the
+// start of any minddata applications.
+extern Status GlobalInit();
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_CORE_CLIENT_H_
diff --git a/mindspore/ccsrc/dataset/core/config_manager.cc b/mindspore/ccsrc/minddata/dataset/core/config_manager.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/core/config_manager.cc
rename to mindspore/ccsrc/minddata/dataset/core/config_manager.cc
index 9291a8f832d..e1fc7f29ba7 100644
--- a/mindspore/ccsrc/dataset/core/config_manager.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/config_manager.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/config_manager.h"
+#include "minddata/dataset/core/config_manager.h"
 
 #include <fstream>
 #include <iostream>
 #include <string>
 
-#include "dataset/util/system_pool.h"
+#include "minddata/dataset/util/system_pool.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/core/config_manager.h b/mindspore/ccsrc/minddata/dataset/core/config_manager.h
similarity index 97%
rename from mindspore/ccsrc/dataset/core/config_manager.h
rename to mindspore/ccsrc/minddata/dataset/core/config_manager.h
index 807591daa16..a8e1907c416 100644
--- a/mindspore/ccsrc/dataset/core/config_manager.h
+++ b/mindspore/ccsrc/minddata/dataset/core/config_manager.h
@@ -22,9 +22,9 @@
 
 #include <nlohmann/json.hpp>
 
-#include "dataset/core/constants.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 
 // Config settings for the client-side
 // example config file:
diff --git a/mindspore/ccsrc/dataset/core/constants.h b/mindspore/ccsrc/minddata/dataset/core/constants.h
similarity index 89%
rename from mindspore/ccsrc/dataset/core/constants.h
rename to mindspore/ccsrc/minddata/dataset/core/constants.h
index 34d2f2583c1..c85ef52bf5f 100644
--- a/mindspore/ccsrc/dataset/core/constants.h
+++ b/mindspore/ccsrc/minddata/dataset/core/constants.h
@@ -32,6 +32,12 @@ enum class DatasetType { kUnknown, kArrow, kTf };
 // Possible flavours of Tensor implementations
 enum class TensorImpl { kNone, kFlexible, kCv, kNP };
 
+// Possible values for Border types
+enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 };
+
+// Possible interpolation modes
+enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 };
+
 // convenience functions for 32bit int bitmask
 inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; }
 
diff --git a/mindspore/ccsrc/dataset/core/cv_tensor.cc b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/core/cv_tensor.cc
rename to mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc
index 16921e8b2df..5af748b5de4 100644
--- a/mindspore/ccsrc/dataset/core/cv_tensor.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/cv_tensor.h"
 
 #include <memory>
 #include <vector>
 
-#include "dataset/core/constants.h"
-#include "dataset/core/tensor.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/tensor.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/core/cv_tensor.h b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h
similarity index 96%
rename from mindspore/ccsrc/dataset/core/cv_tensor.h
rename to mindspore/ccsrc/minddata/dataset/core/cv_tensor.h
index 8c136f5f3cf..a614418be66 100644
--- a/mindspore/ccsrc/dataset/core/cv_tensor.h
+++ b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h
@@ -24,9 +24,9 @@
 
 #include "./securec.h"
 
-#include "dataset/core/constants.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/core/data_type.cc b/mindspore/ccsrc/minddata/dataset/core/data_type.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/core/data_type.cc
rename to mindspore/ccsrc/minddata/dataset/core/data_type.cc
index bb10fae52f5..b5641e3105c 100644
--- a/mindspore/ccsrc/dataset/core/data_type.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/data_type.cc
@@ -13,12 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/data_type.h"
+#include "minddata/dataset/core/data_type.h"
+#ifdef ENABLE_PYTHON
+#include "minddata/dataset/core/pybind_support.h"
+#endif
 
 #include "utils/log_adapter.h"
 
-#include "dataset/core/pybind_support.h"
-
 namespace mindspore {
 namespace dataset {
 
@@ -29,12 +30,14 @@ uint8_t DataType::SizeInBytes() const {
     return 0;
 }
 
+#ifdef ENABLE_PYTHON
 py::dtype DataType::AsNumpyType() const {
   if (type_ < DataType::NUM_OF_TYPES)
     return py::dtype(kTypeInfo[type_].pybindType_);
   else
     return py::dtype("unknown");
 }
+#endif
 
 uint8_t DataType::AsCVType() const {
   uint8_t res = kCVInvalidType;
@@ -112,6 +115,7 @@ std::string DataType::ToString() const {
     return "unknown";
 }
 
+#ifdef ENABLE_PYTHON
 DataType DataType::FromNpArray(const py::array &arr) {
   if (py::isinstance<py::array_t<bool>>(arr)) {
     return DataType(DataType::DE_BOOL);
@@ -156,6 +160,7 @@ std::string DataType::GetPybindFormat() const {
   }
   return res;
 }
+#endif
 
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/core/data_type.h b/mindspore/ccsrc/minddata/dataset/core/data_type.h
similarity index 83%
rename from mindspore/ccsrc/dataset/core/data_type.h
rename to mindspore/ccsrc/minddata/dataset/core/data_type.h
index a487f3300e7..db4834cae2f 100644
--- a/mindspore/ccsrc/dataset/core/data_type.h
+++ b/mindspore/ccsrc/minddata/dataset/core/data_type.h
@@ -19,14 +19,16 @@
 #include <opencv2/core/hal/interface.h>
 
 #include <string>
-
+#ifdef ENABLE_PYTHON
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
-
-#include "dataset/core/constants.h"
-#include "dataset/core/pybind_support.h"
-
+#include "minddata/dataset/core/pybind_support.h"
 namespace py = pybind11;
+#else
+#include "Eigen/Core"
+using float16 = Eigen::half;
+#endif
+#include "minddata/dataset/core/constants.h"
 namespace mindspore {
 namespace dataset {
 
@@ -59,6 +61,7 @@ class DataType {
     const uint8_t cvType_;                      // OpenCv matching type
   };
 
+#ifdef ENABLE_PYTHON
   static inline const TypeInfo kTypeInfo[] = {
     // name, sizeInBytes, pybindTypem formatDescriptor, openCV
     {"unknown", 0, "object", "", kCVInvalidType},                                        // DE_UNKNOWN
@@ -76,19 +79,38 @@ class DataType {
     {"float64", 8, "double", py::format_descriptor<double>::format(), CV_64F},           // DE_FLOAT64
     {"string", 0, "bytes", "S", kCVInvalidType}                                          // DE_STRING
   };
+#else
+  static inline const TypeInfo kTypeInfo[] = {
+    // name, sizeInBytes, pybindTypem formatDescriptor, openCV
+    {"unknown", 0, "object", "", kCVInvalidType},  // DE_UNKNOWN
+    {"bool", 1, "bool", "", CV_8U},                // DE_BOOL
+    {"int8", 1, "int8", "", CV_8S},                // DE_INT8
+    {"uint8", 1, "uint8", "", CV_8U},              // DE_UINT8
+    {"int16", 2, "int16", "", CV_16S},             // DE_INT16
+    {"uint16", 2, "uint16", "", CV_16U},           // DE_UINT16
+    {"int32", 4, "int32", "", CV_32S},             // DE_INT32
+    {"uint32", 4, "uint32", "", kCVInvalidType},   // DE_UINT32
+    {"int64", 8, "int64", "", kCVInvalidType},     // DE_INT64
+    {"uint64", 8, "uint64", "", kCVInvalidType},   // DE_UINT64
+    {"float16", 2, "float16", "", CV_16F},         // DE_FLOAT16
+    {"float32", 4, "float32", "", CV_32F},         // DE_FLOAT32
+    {"float64", 8, "double", "", CV_64F},          // DE_FLOAT64
+    {"string", 0, "bytes", "", kCVInvalidType}     // DE_STRING
+  };
+#endif
 
   // No arg constructor to create an unknown shape
   DataType() : type_(DE_UNKNOWN) {}
 
   // Create a type from a given string
-  // @param type_str
+  /// \param type_str
   explicit DataType(const std::string &type_str);
 
   // Default destructor
   ~DataType() = default;
 
   // Create a type from a given enum
-  // @param d
+  /// \param d
   constexpr explicit DataType(Type d) : type_(d) {}
 
   constexpr bool operator==(const DataType a) const { return type_ == a.type_; }
@@ -100,49 +122,49 @@ class DataType {
   constexpr bool operator!=(const Type a) const { return type_ != a; }
 
   // Disable this usage `if(d)` where d is of type DataType
-  // @return
+  /// \return
   operator bool() = delete;
 
   // To be used in Switch/case
-  // @return
+  /// \return
   operator Type() const { return type_; }
 
   // The number of bytes needed to store one value of this type
-  // @return
+  /// \return
   uint8_t SizeInBytes() const;
 
   // Convert from DataType to OpenCV type
-  // @return
+  /// \return
   uint8_t AsCVType() const;
 
   // Convert from OpenCV type to DataType
-  // @param cv_type
-  // @return
+  /// \param cv_type
+  /// \return
   static DataType FromCVType(int cv_type);
 
   // Returns a string representation of the type
-  // @return
+  /// \return
   std::string ToString() const;
 
   // returns true if the template type is the same as the Tensor type_
-  // @tparam T
-  // @return true or false
+  /// \tparam T
+  /// \return true or false
   template <typename T>
   bool IsCompatible() const {
     return type_ == FromCType<T>();
   }
 
   // returns true if the template type is the same as the Tensor type_
-  // @tparam T
-  // @return true or false
+  /// \tparam T
+  /// \return true or false
   template <typename T>
   bool IsLooselyCompatible() const;
 
   // << Stream output operator overload
-  // @notes This allows you to print the info using stream operators
-  // @param out - reference to the output stream being overloaded
-  // @param rO - reference to the DataType to display
-  // @return - the output stream must be returned
+  /// \notes This allows you to print the info using stream operators
+  /// \param out - reference to the output stream being overloaded
+  /// \param rO - reference to the DataType to display
+  /// \return - the output stream must be returned
   friend std::ostream &operator<<(std::ostream &out, const DataType &so) {
     out << so.ToString();
     return out;
@@ -151,22 +173,24 @@ class DataType {
   template <typename T>
   static DataType FromCType();
 
+#ifdef ENABLE_PYTHON
   // Convert from DataType to Pybind type
-  // @return
+  /// \return
   py::dtype AsNumpyType() const;
 
   // Convert from NP type to DataType
-  // @param type
-  // @return
+  /// \param type
+  /// \return
   static DataType FromNpType(const py::dtype &type);
 
   // Convert from NP array to DataType
-  // @param py array
-  // @return
+  /// \param py array
+  /// \return
   static DataType FromNpArray(const py::array &arr);
+#endif
 
   // Get the buffer string format of the current type. Used in pybind buffer protocol.
-  // @return
+  /// \return
   std::string GetPybindFormat() const;
 
   bool IsSignedInt() const {
diff --git a/mindspore/ccsrc/dataset/core/example.proto b/mindspore/ccsrc/minddata/dataset/core/example.proto
similarity index 100%
rename from mindspore/ccsrc/dataset/core/example.proto
rename to mindspore/ccsrc/minddata/dataset/core/example.proto
diff --git a/mindspore/ccsrc/dataset/core/feature.proto b/mindspore/ccsrc/minddata/dataset/core/feature.proto
similarity index 100%
rename from mindspore/ccsrc/dataset/core/feature.proto
rename to mindspore/ccsrc/minddata/dataset/core/feature.proto
diff --git a/mindspore/ccsrc/dataset/core/global_context.cc b/mindspore/ccsrc/minddata/dataset/core/global_context.cc
similarity index 86%
rename from mindspore/ccsrc/dataset/core/global_context.cc
rename to mindspore/ccsrc/minddata/dataset/core/global_context.cc
index 3de8e0fcd8d..eb76382ab2c 100644
--- a/mindspore/ccsrc/dataset/core/global_context.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/global_context.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/global_context.h"
 
 #include <memory>
 #include <mutex>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/tensor.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/circular_pool.h"
-#include "dataset/util/system_pool.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/util/system_pool.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/core/global_context.h b/mindspore/ccsrc/minddata/dataset/core/global_context.h
similarity index 96%
rename from mindspore/ccsrc/dataset/core/global_context.h
rename to mindspore/ccsrc/minddata/dataset/core/global_context.h
index ee0cbfbbe0e..fe0847f6391 100644
--- a/mindspore/ccsrc/dataset/core/global_context.h
+++ b/mindspore/ccsrc/minddata/dataset/core/global_context.h
@@ -19,9 +19,9 @@
 #include <memory>
 #include <mutex>
 
-#include "dataset/core/constants.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/core/pybind_support.h b/mindspore/ccsrc/minddata/dataset/core/pybind_support.h
similarity index 100%
rename from mindspore/ccsrc/dataset/core/pybind_support.h
rename to mindspore/ccsrc/minddata/dataset/core/pybind_support.h
diff --git a/mindspore/ccsrc/dataset/core/tensor.cc b/mindspore/ccsrc/minddata/dataset/core/tensor.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/core/tensor.cc
rename to mindspore/ccsrc/minddata/dataset/core/tensor.cc
index 8de3425c5b7..842615f9e18 100644
--- a/mindspore/ccsrc/dataset/core/tensor.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor.h"
 
 #include <algorithm>
 #include <iomanip>
@@ -25,13 +25,15 @@
 #include <functional>
 
 #include "common/utils.h"
-#include "dataset/core/constants.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/global_context.h"
-#include "dataset/core/pybind_support.h"
-#include "dataset/core/tensor_shape.h"
-
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/global_context.h"
+#ifdef ENABLE_PYTHON
+#include "minddata/dataset/core/pybind_support.h"
 namespace py = pybind11;
+#endif
+#include "minddata/dataset/core/tensor_shape.h"
+
 namespace mindspore {
 namespace dataset {
 // Helper macros for printing tensor elements
@@ -155,6 +157,7 @@ Tensor::Tensor(const std::vector<std::string> &strings, const TensorShape &shape
   MS_ASSERT(num_bytes == 0);
   if (shape.known()) Tensor::Reshape(shape);
 }
+
 Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape)
     : Tensor(TensorShape({static_cast<dsize_t>(bytes_list.value_size())}), DataType(DataType::DE_STRING)) {
   // total bytes needed = offset array + strings
@@ -194,6 +197,7 @@ Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape
   MS_ASSERT(num_bytes == 0);
   if (shape.known()) Tensor::Reshape(shape);
 }
+
 Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl, const TensorShape &shape,
                             DataType type, const unsigned char *data) {
   if (!shape.known()) {
@@ -223,6 +227,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl
   return Status::OK();  // returns base-class shared_ptr
 }
 
+#ifdef ENABLE_PYTHON
 Status Tensor::CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr) {
   std::vector<dsize_t> shape;
   for (dsize_t i = 0; i < arr.ndim(); i++) {
@@ -297,6 +302,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
 
   return Status::OK();  // returns base-class shared_ptr
 }
+#endif
 
 Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings,
                             const TensorShape &shape) {
@@ -513,6 +519,15 @@ const unsigned char *Tensor::GetBuffer() const {
   return data_;
 }
 
+// check for empty
+bool Tensor::HasData() const {
+  if (data_ == nullptr) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
 unsigned char *Tensor::GetMutableBuffer() {
   if (!shape_.known() || type_ == DataType::DE_UNKNOWN) {
     return nullptr;
@@ -689,21 +704,24 @@ std::vector<dsize_t> Tensor::Strides() {
   return strides;
 }
 
-Status Tensor::GetBufferInfo(Tensor &t, py::buffer_info *out) {
-  CHECK_FAIL_RETURN_UNEXPECTED(t.type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");
+#ifdef ENABLE_PYTHON
+Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
+  RETURN_UNEXPECTED_IF_NULL(t);
+  CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");
 
-  std::string format_desc = t.type().GetPybindFormat();
+  std::string format_desc = t->type().GetPybindFormat();
   if (format_desc.empty()) {
     RETURN_STATUS_UNEXPECTED("Cannot convert DE type tp pybind format");
   }
-  *out = py::buffer_info(t.GetMutableBuffer(),   /* Pointer to buffer */
-                         t.type().SizeInBytes(), /* Size of one scalar */
-                         format_desc,            /* Python struct-style format descriptor */
-                         t.Rank(),               /* Number of dimensions */
-                         t.shape().AsVector(),   /* Buffer dimensions */
-                         t.Strides());
+  *out = py::buffer_info(t->GetMutableBuffer(),   /* Pointer to buffer */
+                         t->type().SizeInBytes(), /* Size of one scalar */
+                         format_desc,             /* Python struct-style format descriptor */
+                         t->Rank(),               /* Number of dimensions */
+                         t->shape().AsVector(),   /* Buffer dimensions */
+                         t->Strides());
   return Status::OK();
 }
+#endif
 
 template <typename T>
 Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
@@ -743,6 +761,8 @@ Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index)
   o->swap(sv);
   return Status::OK();
 }
+
+#ifdef ENABLE_PYTHON
 // return data as numpy, should return status
 Status Tensor::GetDataAsNumpy(py::array *data) {
   RETURN_UNEXPECTED_IF_NULL(data_);
@@ -806,6 +826,7 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) {
   data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data));
   return Status::OK();
 }
+#endif
 
 void Tensor::Squeeze() { shape_ = shape_.Squeeze(); }
 
diff --git a/mindspore/ccsrc/dataset/core/tensor.h b/mindspore/ccsrc/minddata/dataset/core/tensor.h
similarity index 96%
rename from mindspore/ccsrc/dataset/core/tensor.h
rename to mindspore/ccsrc/minddata/dataset/core/tensor.h
index 9fed0bbc975..b0b173e9c37 100644
--- a/mindspore/ccsrc/dataset/core/tensor.h
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor.h
@@ -26,20 +26,27 @@
 #undef HAVE_STDDEF_H
 #undef HAVE_STDLIB_H
 #endif
+
+#ifdef ENABLE_PYTHON
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
-#include "dataset/core/constants.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/status.h"
+#endif
+
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/util/status.h"
 #include "proto/example.pb.h"
 
+#ifdef ENABLE_PYTHON
 namespace py = pybind11;
+#endif
 namespace mindspore {
 namespace dataset {
 class Tensor;
+template <typename T>
+class Allocator;
 
 using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>;
 using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>;  // An allocator shared_ptr for Tensors
@@ -114,16 +121,17 @@ class Tensor {
   static Status CreateTensor(std::shared_ptr<Tensor> *, TensorImpl tensor_impl, const TensorShape &shape, DataType type,
                              const unsigned char *data = nullptr);
 
-  /// Create a copy of the input tensor
-  /// \param out [out] output tensor to be generated
-  /// \param in [in] orginal tensor to be copied
-  /// \return Status
+  // Create a copy of the input tensor
+  // @param out [out] output tensor to be generated
+  // @param in [in] orginal tensor to be copied
+  // @return Status
   static Status CreateTensor(std::shared_ptr<Tensor> *out, const std::shared_ptr<Tensor> &in) {
     const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
     *out = std::allocate_shared<Tensor>(*alloc, in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes());
     return Status::OK();
   }
 
+#ifdef ENABLE_PYTHON
   // A static factory method to create a Tensor from a given py::array.
   // @param ptr output argument to hold the created Tensor
   // @param arr py::array
@@ -132,6 +140,7 @@ class Tensor {
 
   // Helper function to create a tensor from Numpy of strings
   static Status CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr);
+#endif
 
   // A static factory method to create a Tensor from a given list of strings.
   // @param ptr output argument to hold the created Tensor
@@ -170,6 +179,7 @@ class Tensor {
   static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const T &item) {
     return CreateTensor<T>(ptr, {item}, TensorShape::CreateScalar());
   }
+
   // Create tensor from protobuf bytelist with uint8 or int8 types
   static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
                              const TensorShape &shape, const DataType &type, dsize_t pad_size);
@@ -277,6 +287,10 @@ class Tensor {
   // @return
   const TensorShape &shape() const { return shape_; }
 
+  /// Check if tensor has data
+  /// \return bool - true if tensor is empty
+  bool HasData() const;
+
   // Reshape the tensor. The given shape should have the same number of elements in the Tensor
   // @param shape
   virtual Status Reshape(const TensorShape &shape);
@@ -342,12 +356,12 @@ class Tensor {
 
   virtual void Squeeze();
 
-  /// Calculates the strides of the Tensor
-  /// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
-  /// The strides will be {6,2,1}.
-  /// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
-  /// The strides will be {24,8,4}.
-  /// @return vector of integers
+  // Calculates the strides of the Tensor
+  // Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
+  // The strides will be {6,2,1}.
+  // Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
+  // The strides will be {24,8,4}.
+  // @return vector of integers
   std::vector<dsize_t> Strides();
 
   std::string ToString() {
@@ -372,6 +386,7 @@ class Tensor {
   // Slice string tensors
   Status SliceString(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices);
 
+#ifdef ENABLE_PYTHON
   // Constructs numpy array from input tensor
   // @param data this data is the location of python data
   // @return Status code
@@ -379,7 +394,8 @@ class Tensor {
 
   Status GetDataAsNumpyStrings(py::array *data);
 
-  static Status GetBufferInfo(Tensor &t, py::buffer_info *out);
+  static Status GetBufferInfo(Tensor *t, py::buffer_info *out);
+#endif
 
   // Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor
   Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input);
@@ -566,7 +582,7 @@ class Tensor {
 
   // Return a TensorIterator that points to the start of the Tensor.
   // It's the user responsibility to use the correct type that matches the Tensor type
-  // @tparam T The type of values in the Tensor
+  // @param T The type of values in the Tensor
   // @return TensorIterator
   template <typename T>
   TensorIterator<T> begin() {
diff --git a/mindspore/ccsrc/dataset/core/tensor_row.cc b/mindspore/ccsrc/minddata/dataset/core/tensor_row.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/core/tensor_row.cc
rename to mindspore/ccsrc/minddata/dataset/core/tensor_row.cc
index 882f6728bfb..5d75730a4ce 100644
--- a/mindspore/ccsrc/dataset/core/tensor_row.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor_row.cc
@@ -16,9 +16,8 @@
 
 #include <utility>
 
-#include "dataset/core/tensor_row.h"
+#include "minddata/dataset/core/tensor_row.h"
 
-namespace py = pybind11;
 namespace mindspore {
 namespace dataset {
 
diff --git a/mindspore/ccsrc/dataset/core/tensor_row.h b/mindspore/ccsrc/minddata/dataset/core/tensor_row.h
similarity index 98%
rename from mindspore/ccsrc/dataset/core/tensor_row.h
rename to mindspore/ccsrc/minddata/dataset/core/tensor_row.h
index 49bc61657c4..e8f066c87bb 100644
--- a/mindspore/ccsrc/dataset/core/tensor_row.h
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor_row.h
@@ -21,7 +21,7 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/core/tensor_shape.cc b/mindspore/ccsrc/minddata/dataset/core/tensor_shape.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/core/tensor_shape.cc
rename to mindspore/ccsrc/minddata/dataset/core/tensor_shape.cc
index a0d6b9cd8d7..ff40062d377 100644
--- a/mindspore/ccsrc/dataset/core/tensor_shape.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor_shape.cc
@@ -15,13 +15,13 @@
  */
 #define MAX_INTEGER_DTYPE 9223372036854775807
 
-#include "dataset/core/tensor_shape.h"
+#include "minddata/dataset/core/tensor_shape.h"
 
 #include <limits>
 
 #include "common/utils.h"
 #include "utils/log_adapter.h"
-#include "dataset/core/constants.h"
+#include "minddata/dataset/core/constants.h"
 
 namespace mindspore {
 namespace dataset {
@@ -77,6 +77,7 @@ TensorShape::TensorShape(const TensorShape &shape)
   known_ = shape.known_;  // override with the input shape in case of unknown-rank tensor shape.
 }
 
+#ifdef ENABLE_PYTHON
 TensorShape::TensorShape(py::list l)
     : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) {
   std::vector<dsize_t> list_c;
@@ -89,6 +90,7 @@ TensorShape::TensorShape(py::list l)
   }
   AddListToShape(list_c);
 }
+#endif
 
 TensorShape::TensorShape(cv::MatSize cv_size, uint32_t type)
     : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) {
@@ -197,6 +199,7 @@ TensorShape TensorShape::AppendDim(dsize_t dim) const {
   return TensorShape(vec);
 }
 
+#ifdef ENABLE_PYTHON
 py::list TensorShape::AsPyList() {
   py::list list;
   for (auto i : raw_shape_) {
@@ -204,6 +207,7 @@ py::list TensorShape::AsPyList() {
   }
   return list;
 }
+#endif
 
 TensorShape TensorShape::Squeeze() const {
   std::vector<dsize_t> new_shape;
diff --git a/mindspore/ccsrc/dataset/core/tensor_shape.h b/mindspore/ccsrc/minddata/dataset/core/tensor_shape.h
similarity index 57%
rename from mindspore/ccsrc/dataset/core/tensor_shape.h
rename to mindspore/ccsrc/minddata/dataset/core/tensor_shape.h
index c83e43cd7dc..4944f9e32c2 100644
--- a/mindspore/ccsrc/dataset/core/tensor_shape.h
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor_shape.h
@@ -24,13 +24,16 @@
 
 #include <opencv2/core/mat.hpp>
 
+#ifdef ENABLE_PYTHON
 #include "pybind11/pybind11.h"
-
-#include "dataset/core/constants.h"
-#include "dataset/core/global_context.h"
-#include "dataset/util/allocator.h"
-
 namespace py = pybind11;
+#endif
+
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/util/allocator.h"
+
 namespace mindspore {
 namespace dataset {
 // Class that represents a shape of a Tensor. A shape can be:
@@ -43,7 +46,8 @@ namespace dataset {
 //        -# one or more dim is unknown --> not empty vector --> <d1, d2, d2, d3, ...> where di is unknown\n
 //           Example: <3,?> (the 1st dim is unknown)\n
 //              <2,?,?,?> (all dims but the 0th dim are unknown)
-//  TensorShape supports any dim > 0 and < 2^31-1
+
+/// \brief  TensorShape supports any dim > 0 and < 2^31-1
 class TensorShape {
  public:
   static constexpr dsize_t kDimUnknown = -1;  // constant for an unknown dimension
@@ -51,57 +55,59 @@ class TensorShape {
   // Force the compiler to not create a no-arg constructor
   TensorShape() = delete;
 
-  // Create a Shape from an initialization list (e.g., TensorShape s = {2,2}).
-  // If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
-  // @param list
+  /// \brief Create a Shape from an initialization list (e.g., TensorShape s = {2,2}).
+  ///     If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
+  /// \param[in] list
   explicit TensorShape(const std::initializer_list<dsize_t> &list);
 
-  // Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ).
-  // If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
-  // @param list
+  /// \brief Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ).
+  ///     If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
+  /// \param[in] list
   explicit TensorShape(const std::vector<dsize_t> &list);
 
-  // Copy constructor
-  // @param shape
+  /// \brief Copy constructor
+  /// \param[in] shape
   TensorShape(const TensorShape &shape);
 
-  // construct a TensorShape via a python list
-  // @param py::list l - a list object from python
+#ifdef ENABLE_PYTHON
+  /// \brief construct a TensorShape via a python list
+  /// \param[in] py::list l - a list object from python
   explicit TensorShape(py::list l);
+#endif
 
   ~TensorShape() = default;
 
-  // Create a scalar Shape (i.e., empty shape with mKnown = true)
-  // @return TensorShape
+  /// \brief Create a scalar Shape (i.e., empty shape with mKnown = true)
+  /// \return TensorShape
   static TensorShape CreateScalar() { return TensorShape({}); }
 
-  // Create a shape with an unknown rank.
-  // @return TensorShape
+  /// \brief Create a shape with an unknown rank.
+  /// \return TensorShape
   static TensorShape CreateUnknownRankShape();
 
-  // Create a shape with a known rank .
-  // @return TensorShape
+  /// \brief Create a shape with a known rank .
+  /// \return TensorShape
   static TensorShape CreateUnknownShapeWithRank(dsize_t rank);
 
-  // Insert a new dim into a copy of the current shape.
-  // @param dim to be added
-  // @param axis the index where dim should be added
-  // @return New modified shape
+  /// \brief Insert a new dim into a copy of the current shape.
+  /// \param[in] dim to be added
+  /// \param[in] axis the index where dim should be added
+  /// \return New modified shape
   TensorShape InsertDim(dsize_t axis, dsize_t dim) const;
 
-  // Insert new dim at index 0. For example,  <2,4> --> PrependDim(4) --> <4,2,4>
-  // @param dim
-  // @return
+  /// \brief Insert new dim at index 0. For example,  <2,4> --> PrependDim(4) --> <4,2,4>
+  /// \param[in] dim
+  /// \return
   TensorShape PrependDim(dsize_t dim) const;
 
-  // Insert a new dim at the end of the shape. For example,  <2,4> --> AppendDim(4) --> <2,4,4>
-  // @param dim
-  // @return
+  /// \brief Insert a new dim at the end of the shape. For example,  <2,4> --> AppendDim(4) --> <2,4,4>
+  /// \param[in] dim
+  /// \return
   TensorShape AppendDim(dsize_t dim) const;
 
-  // Create a shape based on OpenCV shape and type
-  // @param cv_size
-  // @param type int that represent the type in OpenCV, example CV_8U, CV_64S
+  /// \brief Create a shape based on OpenCV shape and type
+  /// \param[in] cv_size
+  /// \param[in] type int that represent the type in OpenCV, example CV_8U, CV_64S
   TensorShape(cv::MatSize cv_size, uint32_t type);
 
   dsize_t Size() const { return raw_shape_.size(); }
@@ -123,47 +129,50 @@ class TensorShape {
     return raw_shape_[index];
   }
 
-  // Return the Shape as a vector
-  // @return
+  /// \brief Return the Shape as a vector
+  /// \return
   std::vector<dsize_t> AsVector() const;
 
-  // Returns the class info as a string
-  // @return
+  /// \brief Returns the class info as a string
+  /// \return
   std::string ToString() const {
     std::stringstream ss;
     ss << *this;
     return ss.str();
   }
 
-  // Actual print function used by operator<<
-  // @param out output string stream
+  /// \brief Actual print function used by operator<<
+  /// \param out output string stream
   void Print(std::ostream &out) const;
 
-  // << Stream output operator overload
-  // @notes This allows you to print the info using stream operators
-  // @param out - reference to the output stream being overloaded
-  // @param rO - reference to the TensorShape to display
-  // @return - the output stream must be returned
+  /// \brief << Stream output operator overload
+  ///     This allows you to print the info using stream operators
+  /// \param[in] out - reference to the output stream being overloaded
+  /// \param[in] rO - reference to the TensorShape to display
+  /// \return - the output stream must be returned
   friend std::ostream &operator<<(std::ostream &out, const TensorShape &so) {
     so.Print(out);
     return out;
   }
 
+#ifdef ENABLE_PYTHON
   py::list AsPyList();
+#endif
 
-  // Checks if the given index is a valid index for this tensor.
-  // For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not.
-  // @param index
-  // @return bool
+  /// \brief Checks if the given index is a valid index for this tensor.
+  ///     For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not.
+  /// \param[in] index
+  /// \return bool
   bool IsValidIndex(const std::vector<dsize_t> &index) const;
 
   TensorShape Squeeze() const;
 
   std::vector<dsize_t> Strides() const;
 
-  // Returns the location of the item assuming row major memory layout.
-  // @param index
-  // @return
+  /// \brief Returns the location of the item assuming row major memory layout.
+  /// \param[in] index
+  /// \param[out] flat_index
+  /// \return
   Status ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const;
 
  private:
@@ -174,11 +183,11 @@ class TensorShape {
   // Vector to keep the strides of the shape. The size is rank+1
   std::vector<dsize_t, IntAlloc> strides_;
 
-  // Internal utility function to iterate over a list, check if the dim is valid and then insert it into the shape.
-  // @tparam T list
-  // @param list Iterable list
-  // @return true if the shape is valid and no overflow would be generated when counting the number of elements.
-  //         False otherwise.
+  /// \brief Internal utility function to iterate over a list,
+  ///     check if the dim is valid and then insert it into the shape.
+  /// \param[in] list Iterable list
+  /// \return true if the shape is valid and no overflow would be generated when counting the number of elements.
+  ///     False otherwise.
   template <typename T>
   void AddListToShape(const T &list);
 };
diff --git a/mindspore/ccsrc/dataset/engine/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/CMakeLists.txt
similarity index 74%
rename from mindspore/ccsrc/dataset/engine/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/CMakeLists.txt
index 66f95d09266..e3ead16d057 100644
--- a/mindspore/ccsrc/dataset/engine/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/CMakeLists.txt
@@ -2,6 +2,7 @@ add_subdirectory(datasetops)
 add_subdirectory(opt)
 add_subdirectory(gnn)
 add_subdirectory(perf)
+add_subdirectory(cache)
 if (ENABLE_TDTQUE)
   add_subdirectory(tdt)
 endif ()
@@ -17,7 +18,9 @@ add_library(engine OBJECT
 target_include_directories(engine PRIVATE ${pybind11_INCLUDE_DIRS})
 
 if (ENABLE_TDTQUE)
-  add_dependencies(engine engine-datasetops engine-datasetops-source engine-tdt engine-opt engine-gnn engine-perf)
-else()
-  add_dependencies(engine engine-datasetops engine-datasetops-source engine-opt engine-gnn engine-perf)
+  add_dependencies(engine engine-datasetops engine-datasetops-source engine-tdt engine-opt engine-gnn engine-perf
+                   engine-cache-client engine-cache-server)
+else ()
+  add_dependencies(engine engine-datasetops engine-datasetops-source engine-opt engine-gnn engine-perf
+                   engine-cache-client engine-cache-server)
 endif ()
diff --git a/mindspore/ccsrc/dataset/api/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/cache/CMakeLists.txt
similarity index 54%
rename from mindspore/ccsrc/dataset/api/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/cache/CMakeLists.txt
index 194aeed4572..5e7ebea1762 100644
--- a/mindspore/ccsrc/dataset/api/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/CMakeLists.txt
@@ -1,7 +1,8 @@
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(APItoPython OBJECT
-  de_pipeline.cc
-  python_bindings.cc
-  )
-target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS})
+add_library(engine-cache-client OBJECT
+    cache_client.cc
+    cache_request.cc)
+add_library(engine-cache-server OBJECT
+    cache_service.cc
+    cache_server.cc)
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.cc
new file mode 100644
index 00000000000..04746131bb0
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.cc
@@ -0,0 +1,208 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iomanip>
+#include "minddata/dataset/engine/cache/cache_client.h"
+#include "minddata/dataset/engine/cache/cache_request.h"
+#include "minddata/dataset/util/bit.h"
+
+namespace mindspore {
+namespace dataset {
+
+// Constructor
+CacheClient::CacheClient(uint32_t session_id, uint64_t cache_mem_sz, bool spill)
+    : server_connection_id_(0), session_id_(session_id), cache_crc_(0), cache_mem_sz_(cache_mem_sz), spill_(spill) {}
+
+// print method for display cache details
+void CacheClient::Print(std::ostream &out) const {
+  out << "  Session id: " << session_id_ << "\n  Cache crc: " << cache_crc_
+      << "\n  Server cache id: " << server_connection_id_ << "\n  Cache mem size: " << cache_mem_sz_
+      << "\n  Spilling: " << std::boolalpha << spill_;
+}
+
+Status CacheClient::WriteRow(const TensorRow &row, row_id_type *row_id_from_server) const {
+  CacheRowRequest rq(server_connection_id_, cookie());
+  RETURN_IF_NOT_OK(rq.SerializeCacheRowRequest(row));
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  RETURN_IF_NOT_OK(rq.Wait());
+  if (row_id_from_server != nullptr) {
+    *row_id_from_server = rq.GetRowIdAfterCache();
+  }
+  return Status::OK();
+}
+
+Status CacheClient::WriteBuffer(std::unique_ptr<DataBuffer> &&in) const {
+  std::unique_ptr<DataBuffer> db_ptr = std::move(in);
+  auto num_rows = db_ptr->NumRows();
+  std::vector<TensorRow> all_rows;
+  if (num_rows > 0) {
+    all_rows.reserve(num_rows);
+    // Break down the DataBuffer into TensorRow. We will send the requests async
+    // and then do a final wait.
+    MemGuard<CacheRowRequest> rq_arr;
+    RETURN_IF_NOT_OK(rq_arr.allocate(num_rows, server_connection_id_, cookie()));
+    CacheServer &cs = CacheServer::GetInstance();
+    for (auto i = 0; i < num_rows; ++i) {
+      TensorRow row;
+      auto rq = rq_arr[i];
+      RETURN_IF_NOT_OK(db_ptr->PopRow(&row));
+      RETURN_IF_NOT_OK(rq->SerializeCacheRowRequest(row));
+      RETURN_IF_NOT_OK(cs.PushRequest(rq));
+      // We can't let row go out of scope. Otherwise it will free all the tensor memory.
+      // So park it in the vector. When this function go out of scope, its memory
+      // will be freed.
+      all_rows.push_back(std::move(row));
+    }
+    // Now we wait for the requests to be done.
+    for (auto i = 0; i < num_rows; ++i) {
+      auto rq = rq_arr[i];
+      RETURN_IF_NOT_OK(rq->Wait());
+    }
+  }
+  return Status::OK();
+}
+
+Status CacheClient::GetRows(const std::vector<row_id_type> &row_id, TensorTable *out) const {
+  RETURN_UNEXPECTED_IF_NULL(out);
+  BatchFetchRequest rq(server_connection_id_, row_id);
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  RETURN_IF_NOT_OK(rq.Wait());
+  RETURN_IF_NOT_OK(rq.RestoreRows(out));
+  return Status::OK();
+}
+
+Status CacheClient::CreateCache(uint32_t tree_crc, bool generate_id) {
+  UniqueLock lck(&mux_);
+  // To create a cache, we identify ourself at the client by:
+  // - the shared session id
+  // - a crc for the tree nodes from the cache downward
+  // Pack these 2 into a single 64 bit request id
+  //
+  // Consider this example:
+  // tree1: tfreader --> map(decode) --> cache (session id = 1, crc = 123) --> batch
+  // tree2: cifar10 --> map(rotate) --> cache (session id = 1, crc = 456) --> batch
+  // These are different trees in a single session, but the user wants to share the cache.
+  // This is not allowed because the data of these caches are different.
+  //
+  // Consider this example:
+  // tree1: tfreader --> map(decode) --> cache (session id = 1, crc = 123) --> batch
+  // tree2: tfreader --> map(decode) --> cache (session id = 1, crc = 123) --> map(rotate) --> batch
+  // These are different trees in the same session, but the cached data is the same, so it is okay
+  // to allow the sharing of this cache between these pipelines.
+
+  // The CRC is computed by the tree prepare phase and passed to this function when creating the cache.
+  // If we already have a server_connection_id_, then it means this same cache client has already been used
+  // to create a cache and some other tree is trying to use the same cache.
+  // That is allowed, however the crc better match!
+  if (server_connection_id_) {
+    if (cache_crc_ != tree_crc) {
+      RETURN_STATUS_UNEXPECTED("Attempt to re-use a cache for a different tree!");
+    }
+    // Check the state of the server. For non-mappable case where there is a build phase and a fetch phase, we should
+    // skip the build phase.
+    lck.Unlock();  // GetStat will grab the mutex again. So unlock it to prevent deadlock.
+    CacheClient::ServiceStat stat{};
+    RETURN_IF_NOT_OK(GetStat(&stat));
+    if (stat.cache_service_state == static_cast<uint8_t>(CacheService::State::kFetchPhase)) {
+      return Status(StatusCode::kDuplicateKey, __LINE__, __FILE__, "Not an error and we should bypass the build phase");
+    }
+  } else {
+    cache_crc_ = tree_crc;  // It's really a new cache we're creating so save our crc in the client
+    // Combine the session and crc.  This will form our client cache identifier.
+    connection_id_type connection_identification = (static_cast<uint64_t>(session_id_) << 32) | cache_crc_;
+    // Now execute the cache create request using this identifier and other configs
+    BaseRequest::CreateCacheFlag createFlag = BaseRequest::CreateCacheFlag::kNone;
+    if (spill_) {
+      createFlag |= BaseRequest::CreateCacheFlag::kSpillToDisk;
+    }
+    if (generate_id) {
+      createFlag |= BaseRequest::CreateCacheFlag::kGenerateRowId;
+    }
+    CreationCacheRequest rq(connection_identification, cache_mem_sz_, createFlag);
+    RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+    Status rc = rq.Wait();
+    if (rc.IsOk() || rc.get_code() == StatusCode::kDuplicateKey) {
+      server_connection_id_ = rq.GetServerConnectionId();
+      if (rc.IsOk()) {
+        // The 1st guy creating the cache will get a cookie back.
+        // But this object may be shared among pipelines and we don't want
+        // overwrite it.
+        cookie_ = rq.cookie();
+      }
+    }
+    // We are not resetting the Duplicate key return code. We are passing it back to the CacheOp. This will tell the
+    // CacheOp to bypass the build phase.
+    return rc;
+  }
+  return Status::OK();
+}
+
+Status CacheClient::PurgeCache() {
+  UniqueLock lck(&mux_);
+  PurgeCacheRequest rq(server_connection_id_);
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  return rq.Wait();
+}
+
+Status CacheClient::DestroyCache() {
+  UniqueLock lck(&mux_);
+  DestroyCacheRequest rq(server_connection_id_);
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  return rq.Wait();
+}
+
+Status CacheClient::GetStat(ServiceStat *stat) {
+  SharedLock lck(&mux_);
+  RETURN_UNEXPECTED_IF_NULL(stat);
+  GetStatRequest rq(server_connection_id_);
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  RETURN_IF_NOT_OK(rq.Wait());
+  stat->num_disk_cached = rq.GetNumDiskCached();
+  stat->num_mem_cached = rq.GetNumMemCached();
+  stat->min_row_id = rq.GetMinRowId();
+  stat->max_row_id = rq.GetMaxRowId();
+  stat->cache_service_state = rq.GetState();
+  return Status::OK();
+}
+
+Status CacheClient::CacheSchema(const std::unordered_map<std::string, int32_t> &map) {
+  SharedLock lck(&mux_);
+  CacheSchemaRequest rq(server_connection_id_);
+  RETURN_IF_NOT_OK(rq.SerializeCacheSchemaRequest(map));
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  RETURN_IF_NOT_OK(rq.Wait());
+  return Status::OK();
+}
+
+Status CacheClient::FetchSchema(std::unordered_map<std::string, int32_t> *map) {
+  SharedLock lck(&mux_);
+  RETURN_UNEXPECTED_IF_NULL(map);
+  FetchSchemaRequest rq(server_connection_id_);
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  RETURN_IF_NOT_OK(rq.Wait());
+  *map = rq.GetColumnMap();
+  return Status::OK();
+}
+
+Status CacheClient::BuildPhaseDone() const {
+  SharedLock lck(&mux_);
+  BuildPhaseDoneRequest rq(server_connection_id_, cookie());
+  RETURN_IF_NOT_OK(CacheServer::GetInstance().PushRequest(&rq));
+  RETURN_IF_NOT_OK(rq.Wait());
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.h b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.h
new file mode 100644
index 00000000000..f25db875782
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.h
@@ -0,0 +1,141 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_CACHE_CLIENT_H_
+#define DATASET_ENGINE_CACHE_CLIENT_H_
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "./de_tensor_generated.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/cache/cache_server.h"
+#include "minddata/dataset/util/lock.h"
+
+namespace mindspore {
+namespace dataset {
+/// \brief A CacheClient is a bridge between a DatasetOp and a CacheServer. All communications are through
+/// a CacheClient. Typical tasks including like creating a cache service, cache a data buffer, restore a previously
+/// rows, etc.
+class CacheClient {
+ public:
+  /// \brief Constructor
+  /// \param session_id A user assigned session id for the current pipeline
+  /// \param cache_mem_sz Size of the memory set aside for the row caching. 0 for unlimited
+  /// \param spill Spill to disk if out of memory
+  CacheClient(uint32_t session_id, uint64_t cache_mem_sz, bool spill);
+
+  /// \brief Destructor
+  ~CacheClient() = default;
+
+  /// \brief Getter function for returning the current session id
+  /// \return session id
+  uint64_t session_id() const { return session_id_; }
+
+  /// \brief Send a TensorRow to the cache server
+  /// \param[in] row
+  /// \param[out] row_id_from_server Optional. The row id assigned by the server for non-mappable dataset
+  /// \return return code
+  Status WriteRow(const TensorRow &row, row_id_type *row_id_from_server = nullptr) const;
+
+  /// \brief Send a DataBuffer to the cache server
+  /// \param in Unique pointer of the DataBuffer to be cached
+  /// \return return code
+  Status WriteBuffer(std::unique_ptr<DataBuffer> &&in) const;
+
+  /// \brief Fetch a list of rows from the cache server. An empty TensorRow will be returned if there is
+  /// any cache miss
+  /// \param row_id A vector of row id's
+  /// \param out A TensorTable of TensorRows.
+  /// \return return code
+  Status GetRows(const std::vector<row_id_type> &row_id, TensorTable *out) const;
+
+  /// \brief Create a cache.
+  /// \param tree_crc  A crc that was generated during tree prepare phase
+  /// \param generate_id Let the cache service generate row id
+  /// \return Status object
+  Status CreateCache(uint32_t tree_crc, bool generate_id);
+
+  /// \brief Purge a cache. Cache can be reused after reset.
+  /// \return Status object
+  Status PurgeCache();
+
+  /// \brief Destroy a cache. Like Purge but the cache is deleted and can't be reused.
+  /// \return Status object
+  Status DestroyCache();
+
+  /// \brief Get the statistics from a cache.
+  /// \param[in/out] Pointer to a pre-allocated ServiceStat object
+  /// \return Status object
+  struct ServiceStat {
+    int64_t num_mem_cached;
+    int64_t num_disk_cached;
+    row_id_type min_row_id;
+    row_id_type max_row_id;
+    int8_t cache_service_state;
+  };
+  Status GetStat(ServiceStat *);
+
+  /// \brief Cache the schema at the cache server
+  /// \param map The unordered map of the schema
+  /// \return Status object
+  Status CacheSchema(const std::unordered_map<std::string, int32_t> &map);
+
+  /// \brief Fetch the schema from the cache server
+  /// \param map Pointer to pre-allocated map object
+  /// \return Status object.
+  Status FetchSchema(std::unordered_map<std::string, int32_t> *map);
+
+  /// \brief Change the state from build phase to read phase. Applicable to non-mappable dataset only. Only the cache
+  /// client that holds cookie can be allowed to make this request
+  /// \return Status object
+  Status BuildPhaseDone() const;
+
+  /// \brief A print method typically used for debugging
+  /// \param out The output stream to write output to
+  void Print(std::ostream &out) const;
+
+  /// \brief Stream output operator overload
+  /// \return the output stream must be returned
+  friend std::ostream &operator<<(std::ostream &out, const CacheClient &cc) {
+    cc.Print(out);
+    return out;
+  }
+
+  /// \brief Every cache server has a cookie which uniquely identifies the CacheClient that creates it.
+  /// \return Cookie
+  std::string cookie() const { return cookie_; }
+
+ private:
+  mutable RWLock mux_;
+  uint64_t cache_mem_sz_;
+  bool spill_;
+  // The session_id_ and cache_crc_ work together to uniquely identify this particular cache and allow
+  // sharing of the cache.
+  uint32_t session_id_;
+  uint32_t cache_crc_;
+  // The server_connection_id_ is the actual id we use for operations after the cache is built
+  connection_id_type server_connection_id_;
+  // Some magic cookie returned from the cache server.
+  std::string cookie_;
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_CACHE_CLIENT_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_request.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_request.cc
new file mode 100644
index 00000000000..3b7fc057a2c
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_request.cc
@@ -0,0 +1,223 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#include "minddata/dataset/engine/cache/cache_request.h"
+
+namespace mindspore {
+namespace dataset {
+
+Status CacheRowRequest::SerializeCacheRowRequest(const TensorRow &row) {
+  buffers_.reserve(row.size() + 1);
+  RETURN_IF_NOT_OK(SerializeTensorRowHeader(row));
+  buffers_.push_back(fbb_->GetBufferPointer());
+  for (const auto &ts : row) {
+    buffers_.push_back(ts->GetBuffer());
+  }
+  return Status::OK();
+}
+
+Status CacheRowRequest::SerializeTensorRowHeader(const TensorRow &row) {
+  try {
+    fbb_ = std::make_shared<flatbuffers::FlatBufferBuilder>();
+    std::vector<flatbuffers::Offset<TensorMetaMsg>> v;
+    std::vector<int64_t> tensor_sz;
+    v.reserve(row.size());
+    tensor_sz.reserve(row.size());
+    // We will go through each column in the row.
+    for (const std::shared_ptr<Tensor> &ts_ptr : row) {
+      flatbuffers::Offset<TensorMetaMsg> ts_off;
+      RETURN_IF_NOT_OK(SerializeOneTensorMeta(ts_ptr, &ts_off));
+      v.push_back(ts_off);
+      tensor_sz.push_back(ts_ptr->SizeInBytes());
+    }
+    auto column_off = fbb_->CreateVector(v);
+    auto data_sz_off = fbb_->CreateVector(tensor_sz);
+    TensorRowHeaderMsgBuilder row_builder(*fbb_);
+    row_builder.add_column(column_off);
+    row_builder.add_data_sz(data_sz_off);
+    // Pass the row_id even if it may not be known.
+    row_builder.add_row_id(row.getId());
+    row_builder.add_size_of_this(-1);  // fill in later after we call Finish.
+    auto out = row_builder.Finish();
+    fbb_->Finish(out);
+    // Now go back to fill in size_of_this in the flat buffer.
+    auto msg = GetMutableTensorRowHeaderMsg(fbb_->GetBufferPointer());
+    auto success = msg->mutate_size_of_this(fbb_->GetSize());
+    if (!success) {
+      RETURN_STATUS_UNEXPECTED("Unable to set size_of_this");
+    }
+    return Status::OK();
+  } catch (const std::bad_alloc &e) {
+    return Status(StatusCode::kOutOfMemory, __LINE__, __FILE__);
+  }
+}
+
+Status CacheRowRequest::SerializeOneTensorMeta(const std::shared_ptr<Tensor> &ts_ptr,
+                                               flatbuffers::Offset<TensorMetaMsg> *out_off) {
+  RETURN_UNEXPECTED_IF_NULL(out_off);
+  const Tensor *ts = ts_ptr.get();
+  auto shape_off = fbb_->CreateVector(ts->shape().AsVector());
+  const auto ptr = ts->GetBuffer();
+  if (ptr == nullptr) {
+    RETURN_STATUS_UNEXPECTED("Tensor buffer is null");
+  }
+  auto src = ts->type().value();
+  TensorType dest;
+#define CASE(t)                        \
+  case DataType::t:                    \
+    dest = TensorType::TensorType_##t; \
+    break
+  // Map the type to fill in the flat buffer.
+  switch (src) {
+    CASE(DE_BOOL);
+    CASE(DE_INT8);
+    CASE(DE_UINT8);
+    CASE(DE_INT16);
+    CASE(DE_UINT16);
+    CASE(DE_INT32);
+    CASE(DE_UINT32);
+    CASE(DE_INT64);
+    CASE(DE_UINT64);
+    CASE(DE_FLOAT16);
+    CASE(DE_FLOAT32);
+    CASE(DE_FLOAT64);
+    CASE(DE_STRING);
+    default:
+      MS_LOG(ERROR) << "Unknown tensor. Dumping content:\n" << *ts;
+      RETURN_STATUS_UNEXPECTED("Unknown type");
+  }
+#undef CASE
+
+  TensorMetaMsgBuilder ts_builder(*fbb_);
+  ts_builder.add_dims(shape_off);
+  ts_builder.add_type(dest);
+  auto ts_off = ts_builder.Finish();
+  *out_off = ts_off;
+  return Status::OK();
+}
+
+Status BatchFetchRequest::RestoreOneTensor(const TensorMetaMsg *col_ts, const ReadableSlice &data,
+                                           std::shared_ptr<Tensor> *out) {
+  RETURN_UNEXPECTED_IF_NULL(col_ts);
+  auto shape_in = col_ts->dims();
+  auto type_in = col_ts->type();
+  std::vector<dsize_t> v;
+  v.reserve(shape_in->size());
+  v.assign(shape_in->begin(), shape_in->end());
+  TensorShape shape(v);
+  DataType::Type dest = DataType::DE_UNKNOWN;
+#define CASE(t)               \
+  case TensorType_##t:        \
+    dest = DataType::Type::t; \
+    break
+
+  switch (type_in) {
+    CASE(DE_BOOL);
+    CASE(DE_INT8);
+    CASE(DE_UINT8);
+    CASE(DE_INT16);
+    CASE(DE_UINT16);
+    CASE(DE_INT32);
+    CASE(DE_UINT32);
+    CASE(DE_INT64);
+    CASE(DE_UINT64);
+    CASE(DE_FLOAT16);
+    CASE(DE_FLOAT32);
+    CASE(DE_FLOAT64);
+    CASE(DE_STRING);
+  }
+#undef CASE
+
+  DataType type(dest);
+  std::shared_ptr<Tensor> ts =
+    std::make_shared<Tensor>(shape, type, static_cast<const unsigned char *>(data.GetPointer()), data.GetSize());
+  // Next we restore the real data which can be embedded or stored separately.
+  if (ts->SizeInBytes() != data.GetSize()) {
+    MS_LOG(ERROR) << "Unexpected length. Read " << data.GetSize() << ". Expected " << ts->SizeInBytes() << ".\n"
+                  << "Dumping tensor\n"
+                  << *ts << "\n";
+    RETURN_STATUS_UNEXPECTED("Length mismatch. See log file for details.");
+  }
+  *out = std::move(ts);
+  return Status::OK();
+}
+
+Status BatchFetchRequest::RestoreRows(TensorTable *out) {
+  RETURN_UNEXPECTED_IF_NULL(out);
+  auto num_elements = row_id_.size();
+  auto *offset_array = reinterpret_cast<const int64_t *>(mem_.GetPointer());
+  TensorTable tbl;
+  tbl.reserve(num_elements);
+  ReadableSlice all(mem_.GetPointer(), mem_.GetSizeInBytes());
+  for (auto i = 0; i < num_elements; ++i) {
+    auto len = offset_array[i + 1] - offset_array[i];
+    TensorRow row;
+    row.setId(row_id_.at(i));
+    if (len > 0) {
+      ReadableSlice row_data(all, offset_array[i], len);
+      // Next we de-serialize flat buffer to get back each column
+      auto msg = GetTensorRowHeaderMsg(row_data.GetPointer());
+      auto msg_sz = msg->size_of_this();
+      // Start of the tensor data
+      auto ts_offset = msg_sz;
+      row.reserve(msg->column()->size());
+      for (auto k = 0; k < msg->column()->size(); ++k) {
+        auto col_ts = msg->column()->Get(k);
+        std::shared_ptr<Tensor> ts;
+        ReadableSlice data(row_data, ts_offset, msg->data_sz()->Get(k));
+        RETURN_IF_NOT_OK(RestoreOneTensor(col_ts, data, &ts));
+        row.push_back(ts);
+        ts_offset += data.GetSize();
+      }
+    }
+    tbl.push_back(std::move(row));
+  }
+  *out = std::move(tbl);
+  return Status::OK();
+}
+
+Status CacheSchemaRequest::SerializeCacheSchemaRequest(const std::unordered_map<std::string, int32_t> &map) {
+  try {
+    fbb_ = std::make_shared<flatbuffers::FlatBufferBuilder>();
+    std::vector<flatbuffers::Offset<ColumnNameMsg>> v;
+    v.reserve(map.size());
+    for (auto &column : map) {
+      auto c = CreateColumnNameMsg(*fbb_, fbb_->CreateString(column.first), column.second);
+      v.push_back(c);
+    }
+    auto v_off = fbb_->CreateVector(v);
+    auto final_off = CreateSchemaMsg(*fbb_, v_off);
+    fbb_->Finish(final_off);
+    buf_ = fbb_->GetBufferPointer();
+    len_of_buf_ = fbb_->GetSize();
+    return Status::OK();
+  } catch (const std::bad_alloc &e) {
+    return Status(StatusCode::kOutOfMemory, __LINE__, __FILE__);
+  }
+}
+
+std::unordered_map<std::string, int32_t> FetchSchemaRequest::GetColumnMap() {
+  if (column_name_id_map_.empty()) {
+    auto *map_msg = flatbuffers::GetRoot<SchemaMsg>(mem_.GetPointer());
+    auto v = map_msg->column();
+    for (auto i = 0; i < v->size(); ++i) {
+      auto col = map_msg->column()->Get(i);
+      column_name_id_map_.emplace(col->name()->str(), col->id());
+    }
+  }
+  return column_name_id_map_;
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_request.h b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_request.h
new file mode 100644
index 00000000000..3d0edc6dd87
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_request.h
@@ -0,0 +1,225 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#ifndef DATASET_ENGINE_CACHE_REQ_H_
+#define DATASET_ENGINE_CACHE_REQ_H_
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "./de_tensor_generated.h"
+#include "minddata/dataset/core/tensor_row.h"
+#include "minddata/dataset/util/slice.h"
+#include "minddata/dataset/util/wait_post.h"
+
+namespace mindspore {
+namespace dataset {
+/// \brief CacheClient communicates with CacheServer using Requests.
+class BaseRequest {
+ public:
+  // Request types
+  enum class RequestType : int16_t {
+    kCacheRow = 0,
+    kBatchFetchRows = 1,
+    kCreateCache = 2,
+    kPurgeCache = 3,
+    kDestroyCache = 4,
+    kGetStat = 5,
+    kCacheSchema = 6,
+    kFetchSchema = 7,
+    kBuildPhaseDone = 8,
+    // Add new request before it.
+    kRequestUnknown = 32767
+  };
+  // For kCreateCache
+  enum class CreateCacheFlag : uint32_t { kNone = 0, kSpillToDisk = 1, kGenerateRowId = 1u << 1L };
+  friend class CacheServer;
+  /// \brief Base class of a cache server request
+  /// \param connection_id A combination of session id and crc that uniquely identifies a connection.
+  /// \param type Type of the request
+  explicit BaseRequest(connection_id_type connection_id, RequestType type)
+      : type_(type), connection_id_(connection_id) {}
+  virtual ~BaseRequest() = default;
+  /// \brief Wait for the completion of a request
+  /// \return Status returned from the cache server
+  Status Wait() {
+    RETURN_IF_NOT_OK(wp_.Wait());
+    return rc_;
+  }
+
+  /// \brief Getter function of the current connection id
+  /// \return Connection id
+  connection_id_type GetServerConnectionId() const { return connection_id_; }
+
+ private:
+  RequestType type_;
+  connection_id_type connection_id_;
+  Status rc_;
+  WaitPost wp_;
+};
+/// \brief Request to cache a single TensorRow
+class CacheRowRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  explicit CacheRowRequest(connection_id_type connection_id, const std::string &cookie)
+      : BaseRequest(connection_id, RequestType::kCacheRow), row_id_from_server_(-1), cookie_(cookie) {}
+  ~CacheRowRequest() = default;
+
+  /// \brief Serialize a TensorRow for streaming to the cache server
+  /// \param row TensorRow
+  /// \return Status object
+  Status SerializeCacheRowRequest(const TensorRow &row);
+  /// \brief Return the row id assigned to this row for non-mappable dataset
+  /// \return row id of the cached row
+  row_id_type GetRowIdAfterCache() { return row_id_from_server_; }
+
+ private:
+  std::shared_ptr<flatbuffers::FlatBufferBuilder> fbb_;
+  row_id_type row_id_from_server_;
+  std::vector<const void *> buffers_;
+  std::string cookie_;
+
+  /// \brief Private function to serialize one TensorRow
+  /// \param row TensorRow
+  /// \return Status object
+  Status SerializeTensorRowHeader(const TensorRow &row);
+  /// \brief Private function to serialize one Tensor
+  /// \param ts_ptr Tensor
+  /// \return Status object
+  Status SerializeOneTensorMeta(const std::shared_ptr<Tensor> &ts_ptr, flatbuffers::Offset<TensorMetaMsg> *out_off);
+};
+/// \brief Request to fetch rows in batch
+class BatchFetchRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  friend class CacheService;
+  BatchFetchRequest(connection_id_type connection_id, const std::vector<row_id_type> &row_id)
+      : BaseRequest(connection_id, RequestType::kBatchFetchRows), row_id_(row_id) {}
+  Status RestoreRows(TensorTable *out);
+
+ private:
+  std::vector<row_id_type> row_id_;
+  MemGuard<uint8_t> mem_;
+  Status RestoreOneTensor(const TensorMetaMsg *col_ts, const ReadableSlice &data, std::shared_ptr<Tensor> *out);
+};
+/// \brief Request to create a cache for the current connection
+class CreationCacheRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  /// \brief Constructor
+  /// \param connection_id
+  /// \param cache_mem_sz Maximum memory assigned for this connection. 0 means unlimited
+  /// \param flag Attributes of the cache.
+  explicit CreationCacheRequest(connection_id_type connection_id, uint64_t cache_mem_sz,
+                                CreateCacheFlag flag = CreateCacheFlag::kNone)
+      : BaseRequest(connection_id, RequestType::kCreateCache), cache_mem_sz(cache_mem_sz), flag_(flag) {}
+
+  std::string cookie() const { return cookie_; }
+
+ private:
+  uint64_t cache_mem_sz;
+  CreateCacheFlag flag_;
+  std::string cookie_;
+};
+/// \brief Request to purge a cache.
+class PurgeCacheRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  explicit PurgeCacheRequest(connection_id_type connection_id) : BaseRequest(connection_id, RequestType::kPurgeCache) {}
+};
+/// \brief Request to destroy a cache
+class DestroyCacheRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  explicit DestroyCacheRequest(connection_id_type connection_id)
+      : BaseRequest(connection_id, RequestType::kDestroyCache) {}
+};
+/// \brief Obtain the statistics of the current connection
+class GetStatRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  friend class CacheService;
+  explicit GetStatRequest(connection_id_type connection_id) : BaseRequest(connection_id, RequestType::kGetStat) {}
+  row_id_type GetMinRowId() const {
+    auto *msg = flatbuffers::GetRoot<ServiceStatMsg>(mem_.GetPointer());
+    return msg->min_row_id();
+  }
+  row_id_type GetMaxRowId() const {
+    auto *msg = flatbuffers::GetRoot<ServiceStatMsg>(mem_.GetPointer());
+    return msg->max_row_id();
+  }
+  int64_t GetNumMemCached() const {
+    auto *msg = flatbuffers::GetRoot<ServiceStatMsg>(mem_.GetPointer());
+    return msg->num_mem_cached();
+  }
+  int64_t GetNumDiskCached() const {
+    auto *msg = flatbuffers::GetRoot<ServiceStatMsg>(mem_.GetPointer());
+    return msg->num_disk_cached();
+  }
+  uint8_t GetState() const {
+    auto *msg = flatbuffers::GetRoot<ServiceStatMsg>(mem_.GetPointer());
+    return msg->state();
+  }
+
+ private:
+  MemGuard<uint8_t> mem_;
+};
+/// \brief Request to cache a schema
+class CacheSchemaRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  explicit CacheSchemaRequest(connection_id_type connection_id)
+      : BaseRequest(connection_id, RequestType::kCacheSchema), buf_(nullptr), len_of_buf_(0) {}
+  ~CacheSchemaRequest() = default;
+
+  Status SerializeCacheSchemaRequest(const std::unordered_map<std::string, int32_t> &map);
+  const void *GetBuffer() const { return buf_; }
+
+ private:
+  std::shared_ptr<flatbuffers::FlatBufferBuilder> fbb_;
+  const void *buf_;
+  int64_t len_of_buf_;
+};
+/// \brief Request to fetch a schema
+class FetchSchemaRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  explicit FetchSchemaRequest(connection_id_type connection_id)
+      : BaseRequest(connection_id, RequestType::kFetchSchema) {}
+  ~FetchSchemaRequest() = default;
+
+  std::unordered_map<std::string, int32_t> GetColumnMap();
+
+ private:
+  MemGuard<uint8_t> mem_;
+  std::unordered_map<std::string, int32_t> column_name_id_map_;
+};
+/// \brief Request to change a cache from build phase to read phase. Applies to non-mappable cache only.
+class BuildPhaseDoneRequest : public BaseRequest {
+ public:
+  friend class CacheServer;
+  BuildPhaseDoneRequest(connection_id_type connection_id, const std::string &cookie)
+      : BaseRequest(connection_id, RequestType::kBuildPhaseDone), cookie_(cookie) {}
+
+ private:
+  std::string cookie_;
+};
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_ENGINE_CACHE_SERVICE_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.cc
new file mode 100644
index 00000000000..c9fb6ecab13
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.cc
@@ -0,0 +1,252 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#include "minddata/dataset/engine/cache/cache_server.h"
+#include "minddata/dataset/engine/cache/cache_service.h"
+#include "minddata/dataset/engine/cache/cache_request.h"
+#include "minddata/dataset/util/bit.h"
+
+namespace mindspore {
+namespace dataset {
+Status CacheServer::DoServiceStart() {
+  if (!top_.empty()) {
+    Path spill(top_);
+    RETURN_IF_NOT_OK(spill.CreateDirectories());
+    MS_LOG(INFO) << "CacheServer will use disk folder: " << top_;
+  }
+  RETURN_IF_NOT_OK(vg_.ServiceStart());
+  cache_q_ = std::make_shared<Queue<BaseRequest *>>(1024);
+  RETURN_IF_NOT_OK(cache_q_->Register(&vg_));
+  auto f = std::bind(&CacheServer::ServerRequest, this);
+  // Spawn a a few threads to serve the request.
+  for (auto i = 0; i < num_workers_; ++i) {
+    RETURN_IF_NOT_OK(vg_.CreateAsyncTask("Cache server", f));
+  }
+  return Status::OK();
+}
+
+Status CacheServer::DoServiceStop() {
+  Status rc;
+  Status rc2;
+  // First stop all the threads.
+  RETURN_IF_NOT_OK(vg_.ServiceStop());
+  // Clean up all the caches if any.
+  UniqueLock lck(&rwLock_);
+  auto it = all_caches_.begin();
+  while (it != all_caches_.end()) {
+    auto cs = std::move(it->second);
+    rc2 = cs->ServiceStop();
+    if (rc2.IsError()) {
+      rc = rc2;
+    }
+    ++it;
+  }
+  return rc;
+}
+
+CacheService *CacheServer::GetService(connection_id_type id) const {
+  SharedLock lck(&rwLock_);
+  auto it = all_caches_.find(id);
+  if (it != all_caches_.end()) {
+    return it->second.get();
+  }
+  return nullptr;
+}
+
+Status CacheServer::CreateService(connection_id_type connection_id, uint64_t cache_mem_sz,
+                                  BaseRequest::CreateCacheFlag flag, std::string *out_cookie) {
+  // We can't do spilling unless this server is setup with a spill path in the first place
+  bool spill = (flag & BaseRequest::CreateCacheFlag::kSpillToDisk) == BaseRequest::CreateCacheFlag::kSpillToDisk;
+  bool generate_id =
+    (flag & BaseRequest::CreateCacheFlag::kGenerateRowId) == BaseRequest::CreateCacheFlag::kGenerateRowId;
+  if (spill && top_.empty()) {
+    RETURN_STATUS_UNEXPECTED("Server is not set up with spill support.");
+  }
+  RETURN_UNEXPECTED_IF_NULL(out_cookie);
+  *out_cookie = "";
+  // Before creating the cache, first check if this is a request for a shared usage of an existing cache
+  // If two CreateService come in with identical connection_id, we need to serialize the create.
+  // The first create will be successful and be given a special cookie.
+  UniqueLock lck(&rwLock_);
+  auto end = all_caches_.end();
+  auto it = all_caches_.find(connection_id);
+  if (it == end) {
+    std::unique_ptr<CacheService> cs;
+    try {
+      cs = std::make_unique<CacheService>(cache_mem_sz, spill ? top_ : "", generate_id);
+      RETURN_IF_NOT_OK(cs->ServiceStart());
+      *out_cookie = cs->cookie();
+      all_caches_.emplace(connection_id, std::move(cs));
+    } catch (const std::bad_alloc &e) {
+      return Status(StatusCode::kOutOfMemory);
+    }
+  } else {
+    MS_LOG(INFO) << "Duplicate request for " + std::to_string(connection_id) + " to create cache service";
+    // We can return OK but we will return a duplicate key so user can act accordingly to either ignore it
+    // treat it as OK.
+    return Status(StatusCode::kDuplicateKey);
+  }
+  return Status::OK();
+}
+
+/// This is the main loop the cache server thread(s) are running.
+/// Each thread will pop a request and save the result in the same request.
+/// The sender will wait on the wait post in the request. Once the request
+/// is fulfilled, the server thread will do a post signalling the request is
+/// is processed.
+/// \return
+Status CacheServer::ServerRequest() {
+  TaskManager::FindMe()->Post();
+  // Loop forever until we are interrupted.
+  while (true) {
+    BaseRequest *base_rq = nullptr;
+    RETURN_IF_NOT_OK(cache_q_->PopFront(&base_rq));
+    auto cs = GetService(base_rq->connection_id_);
+    // Except for creating a new session, we expect cs is not null.
+    switch (base_rq->type_) {
+      case BaseRequest::RequestType::kCacheRow: {
+        if (cs == nullptr) {
+          std::string errMsg = "Cache id " + std::to_string(base_rq->connection_id_) + " not found";
+          base_rq->rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, errMsg);
+        } else {
+          auto *rq = reinterpret_cast<CacheRowRequest *>(base_rq);
+          // Only if the cookie matches, we can accept insert into this cache that has a build phase
+          if (!cs->HasBuildPhase() || rq->cookie_ == cs->cookie()) {
+            rq->rc_ = cs->CacheRow(rq->buffers_, &rq->row_id_from_server_);
+          } else {
+            return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Cookie mismatch");
+          }
+        }
+        break;
+      }
+      case BaseRequest::RequestType::kBatchFetchRows: {
+        if (cs == nullptr) {
+          std::string errMsg = "Cache id " + std::to_string(base_rq->connection_id_) + " not found";
+          base_rq->rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, errMsg);
+        } else {
+          auto *rq = reinterpret_cast<BatchFetchRequest *>(base_rq);
+          rq->rc_ = cs->BatchFetch(rq->row_id_, &rq->mem_);
+        }
+        break;
+      }
+      case BaseRequest::RequestType::kCreateCache: {
+        // If the cache is already created we still need to run the creation so that we do sanity checks on the
+        // client id and return the cache id back to the user.
+        auto *rq = reinterpret_cast<CreationCacheRequest *>(base_rq);
+        rq->rc_ = CreateService(rq->connection_id_, rq->cache_mem_sz, rq->flag_, &rq->cookie_);
+        break;
+      }
+      case BaseRequest::RequestType::kPurgeCache: {
+        if (cs != nullptr) {
+          base_rq->rc_ = cs->Purge();
+        } else {
+          // it is already purged. Ignore it.
+          base_rq->rc_ = Status::OK();
+        }
+        break;
+      }
+      case BaseRequest::RequestType::kDestroyCache: {
+        if (cs != nullptr) {
+          // We need a strong lock to protect the map.
+          connection_id_type id = base_rq->connection_id_;
+          UniqueLock lck(&rwLock_);
+          // std::map will invoke the constructor of CacheService. So we don't need to do anything here.
+          auto n = all_caches_.erase(id);
+          if (n == 0) {
+            // It has been destroyed by another duplicate request.
+            MS_LOG(INFO) << "Duplicate request for " + std::to_string(id) + " to create cache service";
+          }
+          base_rq->rc_ = Status::OK();
+        } else {
+          // it is already destroyed. Ignore it.
+          base_rq->rc_ = Status::OK();
+        }
+        break;
+      }
+      case BaseRequest::RequestType::kGetStat: {
+        if (cs == nullptr) {
+          std::string errMsg = "Session " + std::to_string(base_rq->connection_id_) + " not found";
+          base_rq->rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, errMsg);
+        } else {
+          auto *rq = reinterpret_cast<GetStatRequest *>(base_rq);
+          CacheService::ServiceStat svc_stat;
+          rq->rc_ = cs->GetStat(&svc_stat);
+          if (rq->rc_.IsOk()) {
+            flatbuffers::FlatBufferBuilder fbb;
+            ServiceStatMsgBuilder bld(fbb);
+            bld.add_num_disk_cached(svc_stat.stat_.num_disk_cached);
+            bld.add_num_mem_cached(svc_stat.stat_.num_mem_cached);
+            bld.add_max_row_id(svc_stat.max_);
+            bld.add_min_row_id(svc_stat.min_);
+            bld.add_state(svc_stat.state_);
+            auto offset = bld.Finish();
+            fbb.Finish(offset);
+            rq->rc_ = rq->mem_.allocate(fbb.GetSize());
+            if (rq->rc_.IsOk()) {
+              WritableSlice dest(rq->mem_.GetMutablePointer(), fbb.GetSize());
+              ReadableSlice src(fbb.GetBufferPointer(), fbb.GetSize());
+              RETURN_IF_NOT_OK(WritableSlice::Copy(&dest, src));
+            }
+          }
+        }
+        break;
+      }
+      case BaseRequest::RequestType::kCacheSchema: {
+        if (cs == nullptr) {
+          std::string errMsg = "Session " + std::to_string(base_rq->connection_id_) + " not found";
+          base_rq->rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, errMsg);
+        } else {
+          auto *rq = reinterpret_cast<CacheSchemaRequest *>(base_rq);
+          rq->rc_ = cs->CacheSchema(rq->buf_, rq->len_of_buf_);
+        }
+        break;
+      }
+      case BaseRequest::RequestType::kFetchSchema: {
+        if (cs == nullptr) {
+          std::string errMsg = "Session " + std::to_string(base_rq->connection_id_) + " not found";
+          base_rq->rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, errMsg);
+        } else {
+          auto *rq = reinterpret_cast<FetchSchemaRequest *>(base_rq);
+          rq->rc_ = cs->FetchSchema(&rq->mem_);
+        }
+        break;
+      }
+      case BaseRequest::RequestType::kBuildPhaseDone: {
+        if (cs == nullptr) {
+          std::string errMsg = "Session " + std::to_string(base_rq->connection_id_) + " not found";
+          base_rq->rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, errMsg);
+        } else {
+          auto *rq = reinterpret_cast<BuildPhaseDoneRequest *>(base_rq);
+          // We can only allow to switch phase is the cookie match.
+          if (rq->cookie_ == cs->cookie()) {
+            rq->rc_ = cs->BuildPhaseDone();
+          } else {
+            return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Cookie mismatch");
+          }
+        }
+        break;
+      }
+      default:
+        base_rq->rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Unknown request type");
+    }
+    // Notify it is done, and move on to the next request.
+    base_rq->wp_.Set();
+  }
+  return Status::OK();
+}
+CacheServer::CacheServer(const std::string &spill_path, int32_t num_workers)
+    : top_(spill_path), num_workers_(num_workers) {}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.h b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.h
new file mode 100644
index 00000000000..13b68c43890
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.h
@@ -0,0 +1,98 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef DATASET_ENGINE_CACHE_SERVER_H_
+#define DATASET_ENGINE_CACHE_SERVER_H_
+
+#include <algorithm>
+#include <atomic>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+#include <map>
+#include "minddata/dataset/engine/cache/cache_service.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/util/arena.h"
+#include "minddata/dataset/util/cache_pool.h"
+#include "minddata/dataset/util/lock.h"
+#include "minddata/dataset/util/service.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/system_pool.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/task_manager.h"
+
+namespace mindspore {
+namespace dataset {
+class BaseRequest;
+/// \brief A server which provides CacheService services.
+class CacheServer : public Service {
+ public:
+  friend class Services;
+  using cache_index = std::map<connection_id_type, std::unique_ptr<CacheService>>;
+
+  CacheServer(const CacheServer &) = delete;
+  CacheServer &operator=(const CacheServer &) = delete;
+  CacheServer(CacheServer &&) = delete;
+  CacheServer &operator=(CacheServer &) = delete;
+  static CacheServer &GetInstance() noexcept { return Services::getCacheServer(); }
+  Status DoServiceStart() override;
+  Status DoServiceStop() override;
+  ~CacheServer() { (void)ServiceStop(); }
+
+  /// \brief For the current demonstration, a cache client contacts cache server using a Queue.
+  /// \param rq
+  /// \return Status object
+  Status PushRequest(BaseRequest *rq) {
+    RETURN_UNEXPECTED_IF_NULL(rq);
+    RETURN_IF_NOT_OK(cache_q_->Add(rq));
+    return Status::OK();
+  }
+
+ private:
+  mutable RWLock rwLock_;
+  std::string top_;
+  cache_index all_caches_;
+  std::shared_ptr<Queue<BaseRequest *>> cache_q_;
+  TaskGroup vg_;
+  int32_t num_workers_;
+
+  /// \brief Constructor
+  /// \param spill_path Top directory for spilling buffers to.
+  /// \param num_workers Number of threads for handling requests.
+  explicit CacheServer(const std::string &spill_path, int32_t num_workers = 3);
+
+  /// \brief Locate a cache service from connection id.
+  /// \return Pointer to cache service. Null if not found
+  CacheService *GetService(connection_id_type id) const;
+
+  /// \brief Create a cache service. We allow multiple clients to create the same cache service.
+  /// Subsequent duplicate requests are ignored. The first cache client to create the service will be given
+  /// a special unique cookie.
+  /// \param[in] connection_id This is from a Cache client.
+  /// \param[in] cache_mem_sz
+  /// \param[in] flag
+  /// \param[out] out_cookie Only the first cache client will be given a special cookie to identify the creator
+  /// \return Status object
+  Status CreateService(connection_id_type connection_id, uint64_t cache_mem_sz, BaseRequest::CreateCacheFlag flag,
+                       std::string *out_cookie);
+
+  /// \brief Entry point for all server threads.
+  Status ServerRequest();
+};
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_CORE_CACHE_TENSOR_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_service.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_service.cc
new file mode 100644
index 00000000000..4e1208d173f
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_service.cc
@@ -0,0 +1,265 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#include "minddata/dataset/engine/cache/cache_service.h"
+#include "minddata/dataset/util/slice.h"
+
+namespace mindspore {
+namespace dataset {
+CacheService::CacheService(uint64_t mem_sz, const std::string &root, bool generate_id)
+    : root_(root),
+      cache_mem_sz_(mem_sz),
+      cp_(nullptr),
+      map_(nullptr),
+      next_id_(0),
+      generate_id_(generate_id),
+      schema_key_(-1),
+      st_(generate_id ? State::kBuildPhase : State::kNone) {}
+CacheService::~CacheService() { (void)ServiceStop(); }
+bool CacheService::UseArena() {
+  // If fixed size, use Arena instead of the pool from global context.
+  return (cache_mem_sz_ > 0);
+}
+Status CacheService::DoServiceStart() {
+  std::shared_ptr<MemoryPool> mp_;
+  if (UseArena()) {
+    // Create a fixed size arena based on the parameter.
+    std::shared_ptr<Arena> arena;
+    RETURN_IF_NOT_OK(Arena::CreateArena(&arena, cache_mem_sz_));
+    mp_ = std::move(arena);
+  } else {
+    // Unlimited size. Simply use a system pool. Another choice is CircularPool.
+    mp_ = std::make_shared<SystemPool>();
+  }
+  // Put together a CachePool for backing up the Tensor
+  cp_ = std::make_shared<CachePool>(CachePool::value_allocator(mp_), root_);
+  RETURN_IF_NOT_OK(cp_->ServiceStart());
+  // Set up the B+ tree as well. But use the system pool instead.
+  map_ = std::make_shared<row_map>();
+  // Assign a name to this cache. Used for exclusive connection. But we can just use CachePool's name.
+  cookie_ = cp_->MyName();
+  return Status::OK();
+}
+Status CacheService::DoServiceStop() {
+  if (cp_ != nullptr) {
+    RETURN_IF_NOT_OK(cp_->ServiceStop());
+  }
+  return Status::OK();
+}
+Status CacheService::CacheRow(const std::vector<const void *> &buf, row_id_type *row_id_generated) {
+  SharedLock rw(&rw_lock_);
+  RETURN_UNEXPECTED_IF_NULL(row_id_generated);
+  if (st_ == State::kFetchPhase) {
+    // For this kind of cache service, once we are done with the build phase into fetch phase, we can't
+    // allow other to cache more rows.
+    RETURN_STATUS_UNEXPECTED("Can't accept cache request in fetch phase");
+  }
+  try {
+    // The first buffer is a flatbuffer which describes the rest of the buffers follow
+    auto fb = buf.front();
+    RETURN_UNEXPECTED_IF_NULL(fb);
+    auto msg = GetTensorRowHeaderMsg(fb);
+    // If the server side is designed to ignore incoming row id, we generate row id.
+    if (generate_id_) {
+      *row_id_generated = GetNextRowId();
+      // Some debug information on how many rows we have generated so far.
+      if ((*row_id_generated) % 1000 == 0) {
+        MS_LOG(DEBUG) << "Number of rows cached: " << *row_id_generated;
+      }
+    } else {
+      if (msg->row_id() < 0) {
+        std::string errMsg = "Expect positive row id: " + std::to_string(msg->row_id());
+        RETURN_STATUS_UNEXPECTED(errMsg);
+      }
+      *row_id_generated = msg->row_id();
+    }
+    auto size_of_this = msg->size_of_this();
+    auto column_hdr = msg->column();
+    // Number of tensor buffer should match the number of columns plus one.
+    if (buf.size() != column_hdr->size() + 1) {
+      std::string errMsg = "Column count does not match. Expect " + std::to_string(column_hdr->size() + 1) +
+                           " but get " + std::to_string(buf.size());
+      RETURN_STATUS_UNEXPECTED(errMsg);
+    }
+    // Next we store in either memory or on disk. Low level code will consolidate everything in one piece.
+    std::vector<ReadableSlice> all_data;
+    all_data.reserve(column_hdr->size() + 1);
+    all_data.emplace_back(fb, size_of_this);
+    for (auto i = 0; i < column_hdr->size(); ++i) {
+      all_data.emplace_back(buf.at(i + 1), msg->data_sz()->Get(i));
+    }
+    // Now we cache the flat buffer.
+    CachePool::key_type key;
+    RETURN_IF_NOT_OK(cp_->Insert(all_data, &key));
+    Status rc = map_->DoInsert(*row_id_generated, key);
+    if (rc == Status(StatusCode::kDuplicateKey)) {
+      MS_LOG(DEBUG) << "Ignoring duplicate key.";
+    } else {
+      RETURN_IF_NOT_OK(rc);
+    }
+    return Status::OK();
+  } catch (const std::exception &e) {
+    RETURN_STATUS_UNEXPECTED(e.what());
+  }
+}
+std::ostream &operator<<(std::ostream &out, const CacheService &cs) {
+  // Then show any custom derived-internal stuff
+  out << "\nCache memory size: " << cs.cache_mem_sz_;
+  out << "\nSpill path: ";
+  if (cs.root_.empty()) {
+    out << "None";
+  } else {
+    out << cs.GetSpillPath();
+  }
+  return out;
+}
+Path CacheService::GetSpillPath() const { return cp_->GetSpillPath(); }
+Status CacheService::Purge() {
+  // First we must lock exclusively. No one else can cache/restore anything.
+  UniqueLock rw(&rw_lock_);
+  RETURN_IF_NOT_OK(cp_->ServiceStop());
+  auto new_map = std::make_shared<row_map>();
+  map_.reset();
+  map_ = std::move(new_map);
+  next_id_ = 0;
+  RETURN_IF_NOT_OK(cp_->ServiceStart());
+  return Status::OK();
+}
+Status CacheService::GetStat(CacheService::ServiceStat *out) {
+  SharedLock rw(&rw_lock_);
+  RETURN_UNEXPECTED_IF_NULL(out);
+  if (st_ == State::kNone || st_ == State::kFetchPhase) {
+    out->stat_ = cp_->GetStat();
+    out->state_ = static_cast<ServiceStat::state_type>(st_);
+    auto it = map_->begin();
+    if (it != map_->end()) {
+      out->min_ = it.key();
+      auto end_it = map_->end();
+      --end_it;
+      out->max_ = end_it.key();
+    }
+  } else {
+    out->state_ = static_cast<ServiceStat::state_type>(st_);
+  }
+  return Status::OK();
+}
+Status CacheService::BatchFetch(const std::vector<row_id_type> &v, MemGuard<uint8_t> *out) const {
+  RETURN_UNEXPECTED_IF_NULL(out);
+  SharedLock rw(&rw_lock_);
+  if (st_ == State::kBuildPhase) {
+    // For this kind of cache service, we can't fetch yet until we are done with caching all the rows.
+    RETURN_STATUS_UNEXPECTED("Can't accept cache request in fetch phase");
+  }
+  const auto num_elements = v.size();
+  int64_t mem_sz = (num_elements + 1) * sizeof(int64_t);
+  int64_t data_offset = mem_sz;
+  std::vector<int64_t> sz_v;
+  std::vector<CachePool::key_type> keys;
+  sz_v.reserve(num_elements);
+  keys.reserve(num_elements);
+  for (auto row_id : v) {
+    auto r = map_->Search(row_id);
+    if (r.second) {
+      auto &it = r.first;
+      CachePool::key_type key = it.value();
+      auto sz = cp_->GetSize(key);
+      if (sz == 0) {
+        std::string errMsg = "Key not found: ";
+        errMsg += std::to_string(key);
+        RETURN_STATUS_UNEXPECTED(errMsg);
+      }
+      keys.push_back(key);
+      sz_v.push_back(sz);
+      mem_sz += sz;
+    } else {
+      keys.push_back(-1);
+      sz_v.push_back(0);
+    }
+  }
+  MemGuard<uint8_t> mem;
+  RETURN_IF_NOT_OK(mem.allocate(mem_sz));
+  auto *offset_array = reinterpret_cast<int64_t *>(mem.GetMutablePointer());
+  offset_array[0] = data_offset;
+  WritableSlice all(mem.GetMutablePointer(), mem.GetSizeInBytes());
+  for (auto i = 0; i < num_elements; ++i) {
+    auto sz = sz_v.at(i);
+    offset_array[i + 1] = offset_array[i] + sz;
+    if (sz > 0) {
+      WritableSlice row_data(all, offset_array[i], sz);
+      auto key = keys.at(i);
+      size_t bytesRead = 0;
+      RETURN_IF_NOT_OK(cp_->Read(key, &row_data, &bytesRead));
+      if (bytesRead != sz) {
+        MS_LOG(ERROR) << "Unexpected length. Read " << bytesRead << ". Expected " << sz << "."
+                      << " Internal key: " << key << "\n";
+        RETURN_STATUS_UNEXPECTED("Length mismatch. See log file for details.");
+      }
+    }
+  }
+  *out = std::move(mem);
+  return Status::OK();
+}
+Status CacheService::CacheSchema(const void *buf, int64_t len) {
+  SharedLock rw(&rw_lock_);
+  if (st_ == State::kFetchPhase) {
+    // For this kind of cache service, once we are done with the build phase into fetch phase, we can't
+    // allow other to cache more rows.
+    RETURN_STATUS_UNEXPECTED("Can't accept cache request in fetch phase");
+  }
+  // This is a special request and we need to remember where we store it.
+  // In case we are calling the same function from multiple threads, only
+  // the first one is considered. Rest is ignored.
+  CachePool::key_type cur_key = schema_key_;
+  CachePool::key_type key;
+  if (cur_key < 0) {
+    RETURN_IF_NOT_OK(cp_->Insert({ReadableSlice(buf, len)}, &key));
+    auto result = std::atomic_compare_exchange_strong(&schema_key_, &cur_key, key);
+    MS_LOG(DEBUG) << "Caching Schema. Result = " << result;
+  } else {
+    MS_LOG(DEBUG) << "Caching Schema already done";
+  }
+  return Status::OK();
+}
+Status CacheService::FetchSchema(MemGuard<uint8_t> *out) const {
+  SharedLock rw(&rw_lock_);
+  if (st_ == State::kBuildPhase) {
+    // For this kind of cache service, we can't fetch yet until we are done with caching all the rows.
+    RETURN_STATUS_UNEXPECTED("Can't accept cache request in fetch phase");
+  }
+  RETURN_UNEXPECTED_IF_NULL(out);
+  MemGuard<uint8_t> mem;
+  if (schema_key_ >= 0) {
+    auto len = cp_->GetSize(schema_key_);
+    RETURN_IF_NOT_OK(mem.allocate(len));
+    auto slice = WritableSlice(mem.GetMutablePointer(), len);
+    RETURN_IF_NOT_OK(cp_->Read(schema_key_, &slice));
+    *out = std::move(mem);
+  } else {
+    return Status(StatusCode::kFileNotExist, __LINE__, __FILE__, "No schema has been cached");
+  }
+  return Status::OK();
+}
+Status CacheService::BuildPhaseDone() {
+  if (HasBuildPhase()) {
+    // Exclusive lock to switch phase
+    UniqueLock rw(&rw_lock_);
+    st_ = State::kFetchPhase;
+    return Status::OK();
+  } else {
+    RETURN_STATUS_UNEXPECTED("Not a cache that has a build phase");
+  }
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_service.h b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_service.h
new file mode 100644
index 00000000000..bf324e82e34
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_service.h
@@ -0,0 +1,143 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef DATASET_ENGINE_CACHE_SERVICE_H_
+#define DATASET_ENGINE_CACHE_SERVICE_H_
+
+#include <algorithm>
+#include <atomic>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "./de_tensor_generated.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/cache/cache_request.h"
+#include "minddata/dataset/util/arena.h"
+#include "minddata/dataset/util/btree.h"
+#include "minddata/dataset/util/cache_pool.h"
+#include "minddata/dataset/util/service.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/system_pool.h"
+
+namespace mindspore {
+namespace dataset {
+struct CacheStat;
+/// \brief A cache service for storing/fetching buffers to in memory cache and may spill to disk the cache service is
+/// created to support spilling
+class CacheService : public Service {
+ public:
+  friend class CacheServer;
+  using row_map = BPlusTree<row_id_type, CachePool::key_type>;
+
+  enum class State : uint8_t { kNone = 0, kBuildPhase, kFetchPhase };
+
+  /// \brief Constructor
+  /// \param mem_sz Memory size to be set aside for the in memory cache. 0 means unlimited
+  /// \param root Spill path. Empty string means no spilling
+  /// \param generate_id If the cache service should generate row id for buffer that is cached.
+  /// For non-mappable dataset, this should be set to true.
+  CacheService(uint64_t mem_sz, const std::string &root, bool generate_id);
+  ~CacheService();
+
+  /// \brief For fixed size memory, we will create an Arena.
+  /// \return false if unlimited memory.
+  bool UseArena();
+
+  Status DoServiceStart() override;
+  Status DoServiceStop() override;
+
+  /// \brief Main function to cache a row which is in form a series of buffers.
+  /// The first buffer is a Google flatbuffer which describes the rest of the buffers followed.
+  /// \param[in] buf Vector of buffer
+  /// \param[out] row_id_generated The row id assigned to this row if any
+  /// \return Status object
+  Status CacheRow(const std::vector<const void *> &buf, row_id_type *row_id_generated);
+  /// \brief Main function to fetch rows in batch. The output is a contiguous memory which will be decoded
+  /// by the CacheClient. Cache miss is not an error, and will be coded in the output to mark an empty row.
+  /// \param[in] v A vector of row id.
+  /// \param[out] out A contiguous memory buffer that holds the requested rows.
+  /// \return Status object
+  Status BatchFetch(const std::vector<row_id_type> &v, MemGuard<uint8_t> *out) const;
+
+  /// \brief Getter function
+  /// \return Spilling path
+  Path GetSpillPath() const;
+  /// \brief A structure returned from the cache server for statistics request.
+  class ServiceStat {
+   public:
+    using state_type = std::underlying_type<State>::type;
+    ServiceStat() : min_(0), max_(0), state_(0) {}
+    CachePool::CacheStat stat_{};
+    row_id_type min_;
+    row_id_type max_;
+    state_type state_;
+  };
+  /// \brief Statistics for the current service
+  /// \param[in/out] A pointer to a pre-allocated ServiceStat structure
+  /// \return Status Object
+  Status GetStat(ServiceStat *);
+  /// \brief Cache schema
+  /// \param buf A Google Flatbuffer that contains the schema
+  /// \param len size of the buffer
+  /// \return Status object
+  Status CacheSchema(const void *buf, int64_t len);
+  /// \brief Fetch schema
+  /// \param out A contiguous memory that contains the serialized form of schema.
+  /// \return Status object
+  Status FetchSchema(MemGuard<uint8_t> *out) const;
+  /// \brief Purge the content of a cache
+  /// \return Status object
+  Status Purge();
+  /// \brief Overload the << operator to print a cache service
+  /// \param out std::ostream
+  /// \param cs A cache service
+  /// \return std::ostream
+  friend std::ostream &operator<<(std::ostream &out, const CacheService &cs);
+  /// \brief Every cache service has a cookie. If the cookie of a CacheClient matches this cookie, this CacheClient
+  /// is the creator
+  /// \return Cookie
+  std::string cookie() const { return cookie_; }
+  /// \brief If this cache service generates row id for buffer cached, it is divided into two phases, a build phase and
+  /// a read phase.
+  /// \return True if has two phases.
+  bool HasBuildPhase() const { return generate_id_; }
+  /// \brief Change from write phase to read phase. Only the creator of this service is allowed to make this call.
+  /// \return Status object
+  Status BuildPhaseDone();
+
+ private:
+  mutable RWLock rw_lock_;
+  std::string root_;
+  uint64_t cache_mem_sz_;
+  std::shared_ptr<CachePool> cp_;
+  std::shared_ptr<row_map> map_;
+  std::atomic<row_id_type> next_id_;
+  bool generate_id_;
+  std::atomic<CachePool::key_type> schema_key_;
+  std::string cookie_;
+  State st_;
+
+  /// \brief Private function to generate a row id
+  /// \return Row id assigned.
+  row_id_type GetNextRowId() { return next_id_.fetch_add(1); }
+};
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_ENGINE_CACHE_SERVICE_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/de_tensor.fbs b/mindspore/ccsrc/minddata/dataset/engine/cache/de_tensor.fbs
new file mode 100644
index 00000000000..de26069f233
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/de_tensor.fbs
@@ -0,0 +1,81 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+namespace mindspore.dataset;
+
+/// Type of a Tensor
+enum TensorType : byte {
+    DE_UNKNOWN = 0,
+    DE_BOOL = 1,
+    DE_INT8 = 2,
+    DE_UINT8 = 3,
+    DE_INT16 = 4,
+    DE_UINT16 = 5,
+    DE_INT32 = 6,
+    DE_UINT32 = 7,
+    DE_INT64 = 8,
+    DE_UINT64 = 9,
+    DE_FLOAT16 = 10,
+    DE_FLOAT32 = 11,
+    DE_FLOAT64 = 12,
+    DE_STRING = 13
+}
+
+/// The meta information of a Tensor
+/// \note Only the type and shape are considered meta information. Tensor data is excluded.
+table TensorMetaMsg {
+    dims:[int64] (required);
+    type:TensorType;
+}
+
+/// This is the first buffer that is sent to a Cache server when a TensorRow is serialized.
+/// \param row_id is the row id of the TensorRow.
+/// \param column The meta information of each Tensor in the row
+/// \param size of this serialized buffer
+/// \param size of each tensor data buffer that follows
+table TensorRowHeaderMsg {
+    row_id:int64;
+    column:[TensorMetaMsg] (required);
+    size_of_this:int64;
+    data_sz:[int64] (required);
+}
+
+root_type TensorRowHeaderMsg;
+
+/// A row of row id's
+table TensorRowIds {
+    row_id:[int64] (required);
+}
+
+/// Statistics returned from each cache service
+/// \note It must match CacheService::ServiceStat
+table ServiceStatMsg {
+    num_mem_cached:int64;
+    num_disk_cached:int64;
+    min_row_id:int64;
+    max_row_id:int64;
+    state:int8;
+}
+
+/// Column description of each column in a schema
+table ColumnNameMsg {
+    name:string;
+    id:int32;
+}
+
+/// Serialized form of a schema
+table SchemaMsg {
+    column:[ColumnNameMsg];
+}
diff --git a/mindspore/ccsrc/dataset/engine/connector.h b/mindspore/ccsrc/minddata/dataset/engine/connector.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/connector.h
rename to mindspore/ccsrc/minddata/dataset/engine/connector.h
index bd66172be58..a91d8e68e9c 100644
--- a/mindspore/ccsrc/dataset/engine/connector.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/connector.h
@@ -20,10 +20,10 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "dataset/util/task_manager.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/services.h"
-#include "dataset/util/cond_var.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/cond_var.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/data_buffer.cc b/mindspore/ccsrc/minddata/dataset/engine/data_buffer.cc
similarity index 82%
rename from mindspore/ccsrc/dataset/engine/data_buffer.cc
rename to mindspore/ccsrc/minddata/dataset/engine/data_buffer.cc
index 32a70c259f3..b36aae68374 100644
--- a/mindspore/ccsrc/dataset/engine/data_buffer.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/data_buffer.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/data_buffer.h"
-#include "dataset/util/allocator.h"
-#include "dataset/core/global_context.h"
-#include "dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/core/tensor.h"
 
 namespace mindspore {
 namespace dataset {
@@ -24,10 +24,8 @@ namespace dataset {
 // Description: This is the main constructor that is used for making a buffer
 DataBuffer::DataBuffer(int32_t id, BufferFlags flags) : buffer_id_(id), tensor_table_(nullptr), buffer_flags_(flags) {}
 
-// Name: print()
-// Description: A function that prints info about the DataBuffer (base class version)
-void DataBuffer::Print(std::ostream &out,      // In: The output stream to print to
-                       bool show_all) const {  // In: T/F if it should show everything
+// A method for debug printing of the buffer
+void DataBuffer::Print(std::ostream &out, bool show_all) const {
   out << "bufferId: " << buffer_id_ << "\nflags: " << std::hex << buffer_flags_ << std::dec << "\n";
 
   // If the column counts are set then it means that data has been set into
@@ -46,11 +44,6 @@ void DataBuffer::Print(std::ostream &out,      // In: The output stream to print
   }
 }
 
-Status DataBuffer::Load() {
-  std::string err_msg = "Base class load called, but it does not have an implementation!";
-  RETURN_STATUS_UNEXPECTED(err_msg);
-}
-
 // Remove me!! Callers should fetch rows via pop
 Status DataBuffer::GetTensor(std::shared_ptr<Tensor> *ptr, int32_t row_id, int32_t col_id) const {
   if (row_id < tensor_table_->size() && col_id < tensor_table_->at(row_id).size()) {
@@ -92,8 +85,5 @@ Status DataBuffer::SliceOff(int64_t number_of_rows) {
 
   return Status::OK();
 }
-
-// Destructor
-DataBuffer::~DataBuffer() {}
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/engine/data_buffer.h b/mindspore/ccsrc/minddata/dataset/engine/data_buffer.h
similarity index 77%
rename from mindspore/ccsrc/dataset/engine/data_buffer.h
rename to mindspore/ccsrc/minddata/dataset/engine/data_buffer.h
index 2ab07835190..5fcb4c21a50 100644
--- a/mindspore/ccsrc/dataset/engine/data_buffer.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/data_buffer.h
@@ -21,19 +21,17 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "dataset/util/allocator.h"
-#include "dataset/util/status.h"
-#include "dataset/core/constants.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_row.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_row.h"
 
 namespace mindspore {
 namespace dataset {
-// The DataBuffer class is a base class that will represent the data for n values based
-// on a unique row id for each row of data.
-// There can be different types of DataBuffers to abstract over how the data is stored
-// in memory and acquired from storage.
-// Each buffer holds a range of consecutive row id's.
+/// \brief The DataBuffer class is a container of tensor data and is the unit of transmission between
+///     connectors of dataset operators.  Inside the buffer, tensors are organized into a table-like format
+///     where n TensorRows may consist of m tensors (columns).
 class DataBuffer {
  public:
   // Buffer flags
@@ -47,13 +45,13 @@ class DataBuffer {
   // Description: This is the main constructor that is used for making a buffer
   DataBuffer(int32_t id, BufferFlags flags);
 
-  // Destructor
-  virtual ~DataBuffer();
+  /// \brief default destructor
+  ~DataBuffer() = default;
 
-  // Name: print()
-  // Description: A function that prints info about the DataBuffer (base class version)
-  virtual void Print(std::ostream &out,     // In: The output stream to print to
-                     bool show_all) const;  // In: T/F if it should show everything
+  /// \brief A method for debug printing of the buffer
+  /// \param[inout] out The stream to write to
+  /// \param[in] show_all A boolean to toggle between details and summary printing
+  void Print(std::ostream &out, bool show_all) const;
 
   // Provide stream operator for displaying it
   friend std::ostream &operator<<(std::ostream &out, const DataBuffer &cb) {
@@ -61,10 +59,6 @@ class DataBuffer {
     return out;
   }
 
-  // Name: load()
-  // Description: populates the DataBuffer with data based on it's id
-  virtual Status Load();
-
   // Convenience getter functions for flag checking
   bool eof() const { return (static_cast<uint32_t>(buffer_flags_) & static_cast<uint32_t>(kDeBFlagEOF)); }
 
diff --git a/mindspore/ccsrc/dataset/engine/data_schema.cc b/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc
similarity index 99%
rename from mindspore/ccsrc/dataset/engine/data_schema.cc
rename to mindspore/ccsrc/minddata/dataset/engine/data_schema.cc
index 6c5f882bed2..50d910251d7 100644
--- a/mindspore/ccsrc/dataset/engine/data_schema.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/data_schema.h"
 
 #include <algorithm>
 #include <fstream>
@@ -24,8 +24,8 @@
 #include <nlohmann/json.hpp>
 
 #include "common/utils.h"
-#include "dataset/util/status.h"
-#include "dataset/core/tensor_shape.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/tensor_shape.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/data_schema.h b/mindspore/ccsrc/minddata/dataset/engine/data_schema.h
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/data_schema.h
rename to mindspore/ccsrc/minddata/dataset/engine/data_schema.h
index ce61b8952da..96f6f2b118d 100644
--- a/mindspore/ccsrc/dataset/engine/data_schema.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/data_schema.h
@@ -23,10 +23,10 @@
 #include <unordered_map>
 #include <vector>
 #include <nlohmann/json.hpp>
-#include "dataset/core/constants.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/dataset_iterator.cc b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/dataset_iterator.cc
rename to mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc
index be333741b1c..f75ca5d0976 100644
--- a/mindspore/ccsrc/dataset/engine/dataset_iterator.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
 #include <unordered_map>
 #include <utility>
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/util/status.h"
-#include "dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/dataset_iterator.h b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/dataset_iterator.h
rename to mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.h
index 4e40e77c747..253d1604e20 100644
--- a/mindspore/ccsrc/dataset/engine/dataset_iterator.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.h
@@ -20,11 +20,11 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "dataset/util/status.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/perf/dataset_iterator_tracing.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/perf/dataset_iterator_tracing.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/datasetops/CMakeLists.txt
similarity index 51%
rename from mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/CMakeLists.txt
index ed574210304..a2cd6dc07a6 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/CMakeLists.txt
@@ -2,13 +2,12 @@ add_subdirectory(source)
 
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(engine-datasetops OBJECT
+
+set(DATASET_ENGINE_DATASETOPS_SRC_FILES
     dataset_op.cc
     parallel_op.cc
     pipeline_op.cc
-    barrier_op.cc
     batch_op.cc
-    bucket_batch_by_length_op.cc
     device_queue_op.cc
     map_op.cc
     project_op.cc
@@ -19,7 +18,21 @@ add_library(engine-datasetops OBJECT
     shuffle_op.cc
     zip_op.cc
     concat_op.cc
-    filter_op.cc
-    build_vocab_op.cc
+    cache_base_op.cc
+    cache_lookup_op.cc
+    cache_op.cc
+    cache_merge_op.cc
     )
 
+if (ENABLE_PYTHON)
+    set(DATASET_ENGINE_DATASETOPS_SRC_FILES
+        ${DATASET_ENGINE_DATASETOPS_SRC_FILES}
+        bucket_batch_by_length_op.cc
+        barrier_op.cc
+        filter_op.cc
+        build_vocab_op.cc
+        )
+endif()
+
+add_library(engine-datasetops OBJECT ${DATASET_ENGINE_DATASETOPS_SRC_FILES})
+
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc
index 6fc276a75e9..51ea232e68a 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/barrier_op.h"
+#include "minddata/dataset/engine/datasetops/barrier_op.h"
 #include <iomanip>
 #include <utility>
-#include "dataset/core/constants.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/barrier_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.h
index 379b8f146b2..a3ac8432726 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.h
@@ -20,10 +20,10 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "dataset/core/tensor.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/pipeline_op.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc
index 8bfa8c287c0..844d0543074 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc
@@ -13,17 +13,19 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/batch_op.h"
+#include "minddata/dataset/engine/datasetops/batch_op.h"
 
 #include <utility>
 #include <iomanip>
 
 #include "common/utils.h"
-#include "dataset/core/pybind_support.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/kernels/data/data_utils.h"
+#ifdef ENABLE_PYTHON
+#include "minddata/dataset/core/pybind_support.h"
+#endif
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
 
 using float16 = Eigen::half;
 
@@ -38,9 +40,14 @@ BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false), builder_pa
 
 Status BatchOp::Builder::Build(std::shared_ptr<BatchOp> *ptr) {
   RETURN_IF_NOT_OK(SanityCheck());
+#ifdef ENABLE_PYTHON
   *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_,
                                    builder_num_workers_, builder_cols_to_map_, builder_batch_size_func_,
                                    builder_batch_map_func_, builder_pad_map_);
+#else
+  *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_,
+                                   builder_num_workers_, builder_cols_to_map_, builder_pad_map_);
+#endif
   return Status::OK();
 }
 
@@ -52,6 +59,7 @@ Status BatchOp::Builder::SanityCheck() {
   return err.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, common::SafeCStr(err));
 }
 
+#ifdef ENABLE_PYTHON
 BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
                  const std::vector<std::string> &cols_to_map, py::function batch_size_func, py::function batch_map_func,
                  PadInfo pad_map)
@@ -65,6 +73,18 @@ BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size,
       pad_info_(pad_map) {
   worker_queues_.Init(num_workers, op_queue_size);
 }
+#else
+BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
+                 const std::vector<std::string> &cols_to_map, PadInfo pad_map)
+    : ParallelOp(num_workers, op_queue_size),
+      start_batch_size_(batch_size),
+      drop_(drop),
+      pad_(pad),
+      pyfunc_column_names_(cols_to_map),
+      pad_info_(pad_map) {
+  worker_queues_.Init(num_workers, op_queue_size);
+}
+#endif
 
 Status BatchOp::operator()() {
   Status rc = LaunchThreadsAndInitOp();
@@ -206,7 +226,9 @@ Status BatchOp::WorkerEntry(int32_t workerId) {
 Status BatchOp::MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair,
                                   std::unique_ptr<DataBuffer> *db) {
   RETURN_UNEXPECTED_IF_NULL(table_pair.first);
-  if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair));               // pass it through pyfunc
+#ifdef ENABLE_PYTHON
+  if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair));  // pass it through pyfunc
+#endif
   if (pad_) RETURN_IF_NOT_OK(PadColumns(&table_pair.first, pad_info_, column_name_id_map_));  // do padding if needed
   (*db) = std::make_unique<DataBuffer>(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone);
   std::unique_ptr<TensorQTable> dest_table = std::make_unique<TensorQTable>();
@@ -229,6 +251,7 @@ Status BatchOp::EoeReceived(int32_t) {
   return Status::OK();
 }
 
+#ifdef ENABLE_PYTHON
 Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair) {
   TensorBatchTable input_table;
   input_table.reserve(pyfunc_column_names_.size());
@@ -259,16 +282,22 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>
   }
   return Status::OK();
 }
+#endif
 
 Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) {
+#ifdef ENABLE_PYTHON
   if (batch_size_func_ != nullptr) {
     RETURN_IF_NOT_OK(InvokeBatchSizeFunc(batch_size, info));
   } else {
     (*batch_size) = start_batch_size_;
   }
+#else
+  (*batch_size) = start_batch_size_;
+#endif
   return Status::OK();
 }
 
+#ifdef ENABLE_PYTHON
 Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) {
   {
     // Acquire Python GIL
@@ -336,6 +365,7 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou
   }
   return Status(StatusCode::kOK);
 }
+#endif
 
 Status BatchOp::PadColumns(std::unique_ptr<TensorQTable> *table, const PadInfo &pad_info,
                            const std::unordered_map<std::string, int32_t> &column_name_id_map) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/batch_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.h
index 28df5e7e817..0c042433f75 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.h
@@ -26,11 +26,11 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -89,6 +89,7 @@ class BatchOp : public ParallelOp {
       return *this;
     }
 
+#ifdef ENABLE_PYTHON
     // set columns to perform map on
     // @param const std::vector<std::string> & cols_to_map - name of columns to perform map on
     // @return Builder & reference to builder class object
@@ -104,6 +105,7 @@ class BatchOp : public ParallelOp {
       builder_batch_size_func_ = batch_size_func;
       return *this;
     }
+#endif
 
     // @param std::shared_ptr<BatchOp>  *ptr pointer to shared_ptr, actual return arg
     // @return Status - The error code return
@@ -121,8 +123,10 @@ class BatchOp : public ParallelOp {
     int32_t builder_op_connector_size_;
     std::vector<std::string> builder_cols_to_map_;
     PadInfo builder_pad_map_;
+#ifdef ENABLE_PYTHON
     py::function builder_batch_size_func_;
     py::function builder_batch_map_func_;
+#endif
   };
 
   enum batchCtrl : int8_t { kNoCtrl = 0, kEOE = 1, kEOF = 2, kQuit = 3 };
@@ -144,6 +148,7 @@ class BatchOp : public ParallelOp {
     const int64_t get_epoch_num() const { return epoch_num_; }
   };
 
+#ifdef ENABLE_PYTHON
   // BatchOp constructor
   // @param int32_t batch_size
   // @param bool drop
@@ -152,6 +157,10 @@ class BatchOp : public ParallelOp {
   // @param int32_t num_workers
   BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
           const std::vector<std::string> &, py::function batch_size_func, py::function batch_map_func, PadInfo pad_map);
+#else
+  BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
+          const std::vector<std::string> &, PadInfo pad_map);
+#endif
 
   // BatchOp destructor
   ~BatchOp() {}
@@ -219,10 +228,13 @@ class BatchOp : public ParallelOp {
   // @return Status - The error code return
   Status MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair,
                            std::unique_ptr<DataBuffer> *db);
+
+#ifdef ENABLE_PYTHON
   // Function that calls pyfunc to perform map on batch
   // @param (std::pair<std::unique_ptr<TensorQTable>, batch_stats> *table_pair - contains un-batched tensor
   // @return Status - The error code return
   Status MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair);
+#endif
 
   // @param const PadInfo &pad_info pad info to unpack
   // @param const std::unordered_map<std::string, int32_t>& column_name_id_map - column names to index mapping
@@ -247,6 +259,7 @@ class BatchOp : public ParallelOp {
   // @return Status - The error code return
   Status LaunchThreadsAndInitOp();
 
+#ifdef ENABLE_PYTHON
   // Invoke batch size function with current BatchInfo to generate batch size.
   // @return Status - The error code return
   Status InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info);
@@ -254,6 +267,7 @@ class BatchOp : public ParallelOp {
   // Invoke batch map function with current BatchInfo to generate tensors to batch.
   // @return Status - The error code return
   Status InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info);
+#endif
 
   int32_t start_batch_size_;
   bool drop_;                                      // bool for whether to drop remainder or not
@@ -262,8 +276,10 @@ class BatchOp : public ParallelOp {
   PadInfo pad_info_;                               // column names to perform padding on
   std::unique_ptr<ChildIterator> child_iterator_;  // child iterator for fetching TensorRows 1 by 1
   QueueList<std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>> worker_queues_;  // internal queue for syncing worker
+#ifdef ENABLE_PYTHON
   py::function batch_size_func_;  // Function pointer of batch size function
   py::function batch_map_func_;   // Function pointer of per batch map function
+#endif
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc
index 5e143b700fb..138bb7980bc 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/bucket_batch_by_length_op.h"
+#include "minddata/dataset/engine/datasetops/bucket_batch_by_length_op.h"
 
 #include <map>
 #include <memory>
@@ -24,14 +24,14 @@
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
-#include "dataset/core/pybind_support.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/pybind_support.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/util/status.h"
 
 namespace py = pybind11;
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.h
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.h
index bf0bcb0e787..332ff4bb226 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.h
@@ -22,12 +22,12 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/batch_op.h"
-#include "dataset/engine/datasetops/pipeline_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/batch_op.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc
index ceb50585938..8ed51ebbb61 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "dataset/engine/datasetops/build_vocab_op.h"
+#include "minddata/dataset/engine/datasetops/build_vocab_op.h"
 
 #include <algorithm>
 #include <limits>
 #include <string>
 #include <unordered_map>
 #include <utility>
-#include "dataset/core/config_manager.h"
+#include "minddata/dataset/core/config_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.h
index bf358c48c69..42ea0deb5c9 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.h
@@ -22,12 +22,12 @@
 #include <string>
 #include <utility>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/text/vocab.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/text/vocab.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.cc
new file mode 100644
index 00000000000..1b0890686f1
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.cc
@@ -0,0 +1,185 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/engine/datasetops/cache_base_op.h"
+#include <iomanip>
+#include <iostream>
+#include "minddata/dataset/engine/execution_tree.h"
+
+namespace mindspore {
+namespace dataset {
+// A print method typically used for debugging
+void CacheBase::Print(std::ostream &out, bool show_all) const {
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <" << Name() << ">:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nCache client:\n" << *cache_client_ << "\n\n";
+  }
+}
+// Overrides base class reset method.  When an operator does a reset, it cleans up any state
+// info from it's previous execution and then initializes itself so that it can be executed
+// again.
+Status CacheBase::Reset() {
+  if (sampler_ != nullptr) {
+    RETURN_IF_NOT_OK(sampler_->ResetSampler());
+  }
+  // Wake up the workers to get them going again in a new epoch
+  MS_LOG(DEBUG) << Name() << " resetting.";
+  epoch_sync_.Set();
+  return Status::OK();
+}
+CacheBase::CacheBase(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
+                     std::shared_ptr<CacheClient> cache_client, std::shared_ptr<Sampler> sampler)
+    : ParallelOp(num_workers, op_connector_size, sampler),
+      cache_client_(cache_client),
+      rows_per_buffer_(rows_per_buf),
+      // We can cause deadlock if this internal Connector size is too small.
+      keys_miss_(num_workers_, 1, connector_capacity_) {
+  io_block_queues_.Init(num_workers, op_connector_size);
+}
+// Common function to fetch samples from the sampler and send them using the io_block_queues to
+// the parallel workers
+Status CacheBase::FetchSamplesToWorkers() {
+  int64_t buf_cnt = 0;
+  int64_t wait_cnt = 0;
+  do {
+    epoch_sync_.Clear();
+    std::vector<row_id_type> keys;
+    int64_t row_cnt = 0;
+    keys.reserve(rows_per_buffer_);
+    std::unique_ptr<DataBuffer> sampler_buffer;
+    RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer));
+    while (!sampler_buffer->eoe()) {
+      TensorRow sample_row;
+      RETURN_IF_NOT_OK(sampler_buffer->PopRow(&sample_row));
+      std::shared_ptr<Tensor> sample_ids = sample_row[0];
+      for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); itr++) {
+        keys.push_back(*itr);
+        ++row_cnt;
+        if (row_cnt % rows_per_buffer_ == 0) {
+          auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone));
+          RETURN_IF_NOT_OK(io_block_queues_[buf_cnt++ % num_workers_]->Add(std::move(blk)));
+          keys.clear();
+        }
+      }
+      RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer));
+    }
+    if (!keys.empty()) {
+      auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone));
+      RETURN_IF_NOT_OK(io_block_queues_[buf_cnt++ % num_workers_]->Add(std::move(blk)));
+    }
+    // send the eoe
+    RETURN_IF_NOT_OK(
+      io_block_queues_[(buf_cnt++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
+    // If repeat but the not last repeat, wait for reset.
+    if (BitTest(op_ctrl_flags_, kDeOpRepeated) && !BitTest(op_ctrl_flags_, kDeOpLastRepeat)) {
+      MS_LOG(DEBUG) << Name() << " Waiting for reset. Count " << ++wait_cnt << " Buffer sent " << buf_cnt;
+      RETURN_IF_NOT_OK(epoch_sync_.Wait());
+    } else {
+      // We can break out from the loop.
+      break;
+    }
+  } while (true);
+  // Flow the eof before exit
+  RETURN_IF_NOT_OK(
+    io_block_queues_[(buf_cnt++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof)));
+  // Ask all the workers to quit.
+  for (int32_t i = 0; i < num_workers_; i++) {
+    RETURN_IF_NOT_OK(
+      io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
+  }
+  return Status::OK();
+}
+Status CacheBase::FetchFromCache(int32_t worker_id) {
+  int64_t buffer_id = worker_id;
+  std::unique_ptr<IOBlock> blk;
+  do {
+    RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&blk));
+    if (blk->eof()) {
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
+    } else if (blk->eoe()) {
+      if (AllowCacheMiss()) {
+        // This code path is for CacheLookupOp acting as a sampler. If we get a eoe from
+        // a sampler, send a eoe to physical leaf op as well.
+        std::vector<row_id_type> eoe;
+        eoe.push_back(eoe_row_id);
+        RETURN_IF_NOT_OK(keys_miss_.Push(worker_id, eoe));
+      }
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
+    } else {
+      std::vector<int64_t> keys;
+      RETURN_IF_NOT_OK(blk->GetKeys(&keys));
+      if (keys.empty()) {
+        // empty key is a quit signal for workers
+        break;
+      }
+      std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
+      std::unique_ptr<TensorQTable> que = std::make_unique<TensorQTable>();
+      TensorTable ttbl;
+      RETURN_IF_NOT_OK(cache_client_->GetRows(keys, &ttbl));
+      auto row_it = ttbl.begin();
+      std::vector<row_id_type> cache_miss;
+      cache_miss.reserve(keys.size());
+      for (auto row_id : keys) {
+        auto &row = *row_it;
+        if (row.empty()) {
+          if (AllowCacheMiss()) {
+            cache_miss.push_back(row_id);
+          } else {
+            std::string errMsg = "Row id " + std::to_string(row_id) + " not found.";
+            RETURN_STATUS_UNEXPECTED(errMsg);
+          }
+        }
+        que->push_back(std::move(row));
+        ++row_it;
+      }
+      db->set_tensor_table(std::move(que));
+      if (AllowCacheMiss()) {
+        // Because of the way connector works, we push unconditionally even cache_miss can be empty.
+        RETURN_IF_NOT_OK(keys_miss_.Push(worker_id, cache_miss));
+      }
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db)));
+      buffer_id += num_workers_;
+    }
+  } while (true);
+  return Status::OK();
+}
+Status CacheBase::RegisterResources() {
+  RETURN_IF_NOT_OK(epoch_sync_.Register(tree_->AllTasks()));
+  RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
+  return Status::OK();
+}
+CacheBase::~CacheBase() {}
+Status CacheBase::UpdateColumnMapFromCache() {
+  Status rc;
+  // Get the schema from the server. It may not be there yet. So tolerate the error.
+  if (column_name_id_map_.empty()) {
+    rc = cache_client_->FetchSchema(&column_name_id_map_);
+    if (rc == Status(StatusCode::kFileNotExist)) {
+      MS_LOG(DEBUG) << "Schema not in the server yet.";
+      rc = Status::OK();
+    }
+  }
+  return rc;
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.h
new file mode 100644
index 00000000000..fb3e999b76e
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.h
@@ -0,0 +1,108 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_DATASETOPS_CACHE_BASE_OP_H_
+#define DATASET_ENGINE_DATASETOPS_CACHE_BASE_OP_H_
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+#include "minddata/dataset/engine/cache/cache_client.h"
+#include "minddata/dataset/engine/cache/cache_service.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/repeat_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/wait_post.h"
+#include "minddata/dataset/engine/datasetops/cache_base_op.h"
+namespace mindspore {
+namespace dataset {
+/// \brief This is the base class for CacheOp and CacheLookupOp which share many similarities.
+/// \see CacheOp
+/// \see CacheLookupOp
+class CacheBase : public ParallelOp {
+ public:
+  /// \brief Base class constructor
+  /// \param num_workers Number of parallel workers
+  /// \param op_connector_size Connector size
+  /// \param rows_per_buf Number of rows per buffer
+  /// \param cache_client CacheClient for communication to the CacheServer
+  /// \param sampler Sampler which is mandatory
+  CacheBase(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
+            std::shared_ptr<CacheClient> cache_client, std::shared_ptr<Sampler> sampler);
+  /// \brief Destructor
+  ~CacheBase();
+
+  /// \brief Overrides base class reset method.  When an operator does a reset, it cleans up any state
+  /// info from it's previous execution and then initializes itself so that it can be executed
+  /// again.
+  /// \return Status - The error code return
+  Status Reset() override;
+
+  /// \brief A print method typically used for debugging
+  /// \param out The output stream to write output to
+  /// \param show_all A bool to control if you want to show all info or just a summary
+  void Print(std::ostream &out, bool show_all) const override;
+
+  /// \brief << Stream output operator overload
+  /// \notes This allows you to write the debug print info using stream operators
+  /// \param out reference to the output stream being overloaded
+  /// \param mo reference to the CacheOp to display
+  /// \return the output stream must be returned
+  friend std::ostream &operator<<(std::ostream &out, const CacheBase &mo) {
+    mo.Print(out, false);
+    return out;
+  }
+
+  /// \brief Getter for the cache client
+  /// \return shared ptr to the cache client
+  std::shared_ptr<CacheClient> cache_client() { return cache_client_; }
+  /// \brief Setter for the cache client
+  void SetCacheClient(std::shared_ptr<CacheClient> cache_client) { cache_client_ = std::move(cache_client); }
+  /// \brief Derived class must implement this method if a cache miss is treated as error
+  virtual bool AllowCacheMiss() = 0;
+
+ protected:
+  constexpr static int32_t eoe_row_id = -1;
+  std::shared_ptr<CacheClient> cache_client_;
+  WaitPost epoch_sync_;
+  int32_t rows_per_buffer_;
+  Connector<std::vector<row_id_type>> keys_miss_;
+
+  /// \brief Common function to register resources for interrupt
+  /// \note Derived should override this function for extra resources to be registered
+  virtual Status RegisterResources();
+  /// \brief This function is called by main thread to send samples to the worker thread.
+  /// \note It is a non-virtual function
+  /// \return Status object
+  Status FetchSamplesToWorkers();
+  /// \brief This function is called by each worker to fetch rows from the cache server for a given set of
+  /// sample row id's
+  /// \return Status object
+  Status FetchFromCache(int32_t worker_id);
+  /// \brief Get the column map from cache server
+  Status UpdateColumnMapFromCache();
+
+ private:
+  constexpr static int32_t connector_capacity_ = 1024;
+  QueueList<std::unique_ptr<IOBlock>> io_block_queues_;
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_DATASETOPS_CACHE_BASE_OP_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc
new file mode 100644
index 00000000000..0a9b7544bae
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc
@@ -0,0 +1,130 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/engine/datasetops/cache_lookup_op.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "utils/log_adapter.h"
+#include "utils/system/crc32c.h"
+
+namespace mindspore {
+namespace dataset {
+// Builder constructor. Creates the builder object.
+CacheLookupOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_(nullptr) {
+  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
+  build_num_workers_ = cfg->num_parallel_workers();
+  rows_per_buffer_ = cfg->rows_per_buffer();
+  build_op_connector_size_ = cfg->op_connector_size();
+}
+
+// Check if the required parameters are set by the builder.
+Status CacheLookupOp::Builder::SanityCheck() const {
+  if (build_cache_client_ == nullptr) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "CacheLookupOp requires a CacheClient");
+  }
+  // Make sure the cache client has a valid session
+  if (!build_cache_client_->session_id()) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
+                  "Cache client for CacheLookupOp is missing session id");
+  }
+  return Status::OK();
+}
+
+// The builder "build" method creates the final object and does some init on it
+Status CacheLookupOp::Builder::Build(std::shared_ptr<CacheLookupOp> *ptr) {
+  RETURN_IF_NOT_OK(SanityCheck());
+  *ptr = std::make_shared<CacheLookupOp>(build_num_workers_, build_op_connector_size_, rows_per_buffer_,
+                                         build_cache_client_, build_sampler_);
+  return Status::OK();
+}
+Status CacheLookupOp::operator()() {
+  if (!sampler_) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
+                  "CacheLookupOp requires a sampler before it can be executed!");
+  }
+  RETURN_IF_NOT_OK(RegisterResources());
+  // Kick off the workers
+  RETURN_IF_NOT_OK(
+    tree_->LaunchWorkers(num_workers_, std::bind(&CacheLookupOp::WorkerEntry, this, std::placeholders::_1)));
+  // required task group sync after launching workers
+  TaskManager::FindMe()->Post();
+  // We have to wait until the leaf op has handshake with us.
+  RETURN_IF_NOT_OK(leaf_op_wp_.Wait());
+  RETURN_IF_NOT_OK(FetchSamplesToWorkers());
+  return Status::OK();
+}
+Status CacheLookupOp::WorkerEntry(int32_t worker_id) {
+  TaskManager::FindMe()->Post();
+  RETURN_IF_NOT_OK(FetchFromCache(worker_id));
+  return Status::OK();
+}
+Status CacheLookupOp::ResetSampler() { return Status::OK(); }
+Status CacheLookupOp::HandshakeRandomAccessOp(const RandomAccessOp *op) {
+  // We act like a sampler and as a dataset op. During handshake with leaf op,
+  // We must wait until the leaf op has indexed everything.
+  RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(op));
+  // Now we notify the main thread handshake has finished.
+  leaf_op_wp_.Set();
+  return Status::OK();
+}
+Status CacheLookupOp::InitSampler() { return Sampler::InitSampler(); }
+void CacheLookupOp::Print(std::ostream &out, bool show_all) const { CacheBase::Print(out, show_all); }
+Status CacheLookupOp::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) {
+  std::vector<row_id_type> cache_miss;
+  RETURN_IF_NOT_OK(keys_miss_.Pop(0, &cache_miss));
+  // Ignore the case we have no cache miss, we can't return empty samples.
+  while (cache_miss.empty()) {
+    RETURN_IF_NOT_OK(keys_miss_.Pop(0, &cache_miss));
+  }
+  // Special code for eoe
+  if (cache_miss.at(0) == eoe_row_id) {
+    *out_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
+  } else {
+    std::shared_ptr<Tensor> sample_ts;
+    RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ts, cache_miss.size()));
+    (*out_buffer) = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagNone);
+    auto idPtr = sample_ts->begin<int64_t>();
+    for (auto i = 0; i < cache_miss.size(); ++i) {
+      *idPtr = cache_miss.at(i);
+      ++idPtr;
+    }
+    TensorRow row;
+    row.push_back(sample_ts);
+    (*out_buffer)->set_tensor_table(std::make_unique<TensorQTable>(1, row));
+  }
+  return Status::OK();
+}
+Status CacheLookupOp::RegisterResources() {
+  RETURN_IF_NOT_OK(CacheBase::RegisterResources());
+  RETURN_IF_NOT_OK(leaf_op_wp_.Register(tree_->AllTasks()));
+  return Status::OK();
+}
+Status CacheLookupOp::ComputeColMap() {
+  // We don't know the column map at this point unless we contact the cache server
+  // to fetch the schema but the cache server may not have it at this point either.
+  // So we will just return OK and let MergeOp (our parent) to handle it.
+  return Status::OK();
+}
+
+// Visitor accept method for NodePass
+Status CacheLookupOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<CacheLookupOp>(), modified);
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.h
new file mode 100644
index 00000000000..46a58c5d026
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.h
@@ -0,0 +1,122 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_DATASETOPS_CACHE_LOOKUP_OP_H_
+#define DATASET_ENGINE_DATASETOPS_CACHE_LOOKUP_OP_H_
+
+#include <atomic>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+#include "minddata/dataset/engine/datasetops/cache_base_op.h"
+
+namespace mindspore {
+namespace dataset {
+/// \brief provides a memory/disk cache that acts as a save-point within a mappable dataset.
+/// \note For non-mappable dataset, please see CacheOp
+/// \see CacheOp
+class CacheLookupOp : public CacheBase, public Sampler {
+ public:
+  class Builder {
+   public:
+    /// \brief Builder constructor. Creates the builder object.
+    /// \note No default args
+    Builder();
+
+    /// Default destructor
+    ~Builder() = default;
+
+    /// Setter method.
+    /// \treturn Builder setter method returns reference to the builder.
+    Builder &SetNumWorkers(int32_t num_workers) {
+      build_num_workers_ = num_workers;
+      return *this;
+    }
+
+    /// Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetOpConnectorSize(int32_t connector_size) {
+      build_op_connector_size_ = connector_size;
+      return *this;
+    }
+
+    /// Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetClient(std::shared_ptr<CacheClient> cache_client) {
+      build_cache_client_ = cache_client;
+      return *this;
+    }
+
+    /// \brief Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetSampler(std::shared_ptr<Sampler> sampler) {
+      build_sampler_ = std::move(sampler);
+      return *this;
+    }
+
+    /// \brief The builder "build" method creates the final object and does some init on it.
+    /// \param ptr The shared_ptr to the new CacheLookupOp object
+    /// \return Status
+    Status Build(std::shared_ptr<CacheLookupOp> *ptr);
+
+   private:
+    int32_t build_num_workers_;
+    int32_t rows_per_buffer_;
+    int32_t build_op_connector_size_;
+    std::shared_ptr<CacheClient> build_cache_client_;
+    std::shared_ptr<Sampler> build_sampler_;
+
+    // Check if the required parameters are set by the builder.
+    // \return Status The error code return
+    Status SanityCheck() const;
+  };
+  /// \brief Constructor
+  /// \note It takes the same argument as the base class.
+  /// \see CacheBase
+  CacheLookupOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
+                std::shared_ptr<CacheClient> cache_client, std::shared_ptr<Sampler> sampler)
+      : CacheBase(num_workers, op_connector_size, rows_per_buf, cache_client, sampler), Sampler(*(sampler.get())) {}
+  ~CacheLookupOp() = default;
+  // As a parallel op, we override these two functions
+  Status operator()() override;
+  Status WorkerEntry(int32_t worker_id) override;
+  // As a sampler, we override the following functions
+  Status ResetSampler() override;
+  Status HandshakeRandomAccessOp(const RandomAccessOp *op) override;
+  Status InitSampler() override;
+  Status GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) override;
+  void Print(std::ostream &out, bool show_all) const override;
+  bool AllowCacheMiss() override { return true; }
+  std::string Name() const override { return "CacheLookupOp"; }
+
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
+ protected:
+  Status ComputeColMap() override;
+
+ private:
+  WaitPost leaf_op_wp_;
+
+  Status RegisterResources() override;
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_DATASETOPS_CACHE_LOOKUP_OP_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.cc
new file mode 100644
index 00000000000..75579dc3a62
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.cc
@@ -0,0 +1,302 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/engine/datasetops/cache_merge_op.h"
+
+#include <algorithm>
+#include <functional>
+#include <iomanip>
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/util/task_manager.h"
+
+namespace mindspore {
+namespace dataset {
+CacheMergeOp::~CacheMergeOp() = default;
+void CacheMergeOp::Print(std::ostream &out, bool show_all)
+  const {  // Always show the id and name as first line regardless if this is summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <CacheMergeOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\n\n";
+  }
+}
+CacheMergeOp::CacheMergeOp(int32_t numWorkers, int32_t opConnectorSize, int32_t numCleaners,
+                           std::shared_ptr<CacheClient> cache_client, const std::shared_ptr<Sampler> &sampler)
+    : ParallelOp(numWorkers, opConnectorSize, sampler), num_cleaners_(numCleaners), cache_client_(cache_client) {}
+Status CacheMergeOp::operator()() {
+  // A queue of row id to let cleaner send cache miss rows to the cache server
+  // We don't want a small queue as this will block the parallel op workers.
+  // A row id is 8 byte integer. So bigger size doesn't consume a lot of memory.
+  static const int32_t queue_sz = 512;
+  io_que_ = std::make_unique<Queue<row_id_type>>(queue_sz);
+  RETURN_IF_NOT_OK(io_que_->Register(tree_->AllTasks()));
+  RETURN_IF_NOT_OK(
+    tree_->LaunchWorkers(num_workers_, std::bind(&CacheMergeOp::WorkerEntry, this, std::placeholders::_1)));
+  RETURN_IF_NOT_OK(
+    tree_->LaunchWorkers(num_workers_, std::bind(&CacheMergeOp::CacheMissWorkerEntry, this, std::placeholders::_1)));
+  // One dedicated thread to move TensorRow from the pool to the cache server
+  for (auto i = 0; i < num_cleaners_; ++i) {
+    RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask("Cleaner", std::bind(&CacheMergeOp::Cleaner, this)));
+  }
+  TaskManager::FindMe()->Post();
+  return Status::OK();
+}
+// Each parallel worker will pop from the CacheHit stream. If there is a missing TensorRow, we will wait
+// until it shows up in the pool.
+Status CacheMergeOp::WorkerEntry(int32_t worker_id) {
+  TaskManager::FindMe()->Post();
+  std::shared_ptr<DatasetOp> cache_hit_stream = child_[kCacheHitChildIdx];
+  std::unique_ptr<DataBuffer> db_ptr;
+  RETURN_IF_NOT_OK(cache_hit_stream->GetNextBuffer(&db_ptr, worker_id));
+  while (!db_ptr->eof()) {
+    if (db_ptr->eoe()) {
+      RETURN_IF_NOT_OK(EoeReceived(worker_id));
+      db_ptr.reset();
+      RETURN_IF_NOT_OK(cache_hit_stream->GetNextBuffer(&db_ptr, worker_id));
+    } else {
+      // See if there is any missing row
+      auto tbl = std::make_unique<TensorQTable>();
+      while (db_ptr->NumRows() > 0) {
+        TensorRow row;
+        RETURN_IF_NOT_OK(db_ptr->PopRow(&row));
+        if (row.empty()) {
+          auto row_id = row.getId();
+          TensorRowRequest *rq = nullptr;
+          RETURN_IF_NOT_OK(GetRq(row_id, &rq));
+          // Block until the row shows up in the pool.
+          RETURN_IF_NOT_OK(rq->Wait(&row));
+        }
+        tbl->push_back(std::move(row));
+      }
+      db_ptr->set_tensor_table(std::move(tbl));
+      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db_ptr)));
+      RETURN_IF_NOT_OK(cache_hit_stream->GetNextBuffer(&db_ptr, worker_id));
+    }
+  }
+  RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db_ptr)));
+  return Status::OK();
+}
+Status CacheMergeOp::CacheMissWorkerEntry(int32_t workerId) {
+  TaskManager::FindMe()->Post();
+  // We will simply pop TensorRow from the stream and insert them into the pool and
+  // wake up any worker that is awaiting on the missing TensorRow.
+  // If we see an eoe, ignore it. For eof, we exit.
+  std::shared_ptr<DatasetOp> cache_missing_stream = child_[kCacheMissChildIdx];
+  // Before we start, cache the schema at the server. Pick one of the workers
+  // do it. The schema should have been done at prepare time.
+  if (workerId == 0) {
+    RETURN_IF_NOT_OK(cache_client_->CacheSchema(column_name_id_map()));
+  }
+  std::unique_ptr<DataBuffer> db_ptr;
+  RETURN_IF_NOT_OK(cache_missing_stream->GetNextBuffer(&db_ptr, workerId));
+  while (!db_ptr->eof()) {
+    if (db_ptr->eoe()) {
+      // Ignore it.
+      MS_LOG(DEBUG) << "Ignore eoe";
+    } else {
+      while (db_ptr->NumRows() > 0) {
+        TensorRow row;
+        RETURN_IF_NOT_OK(db_ptr->PopRow(&row));
+        row_id_type row_id = row.getId();
+        if (row_id < 0) {
+          std::string errMsg = "Expect positive row id: " + std::to_string(row_id);
+          RETURN_STATUS_UNEXPECTED(errMsg);
+        }
+        TensorRowRequest *rq = nullptr;
+        RETURN_IF_NOT_OK(GetRq(row_id, &rq));
+        rq->WakeUpAny(std::move(row));
+        // Let the cleaner to flush out this row (async) to the cache server.
+        RETURN_IF_NOT_OK(io_que_->EmplaceBack(row_id));
+      }
+    }
+    RETURN_IF_NOT_OK(cache_missing_stream->GetNextBuffer(&db_ptr, workerId));
+  }
+  return Status::OK();
+}
+Status CacheMergeOp::Cleaner() {
+  TaskManager::FindMe()->Post();
+  while (true) {
+    row_id_type row_id;
+    RETURN_IF_NOT_OK(io_que_->PopFront(&row_id));
+    if (row_id < 0) {
+      break;
+    }
+    TensorRowRequest *rq = nullptr;
+    RETURN_IF_NOT_OK(GetRq(row_id, &rq));
+    if (rq->GetState() == TensorRowRequest::State::kClean) {
+      // If already flushed, move on to the next one.
+      continue;
+    }
+    TensorRow row;
+    RETURN_IF_NOT_OK(rq->Release(&row));
+    CHECK_FAIL_RETURN_UNEXPECTED(!row.empty(), "Programming error.");
+    Status rc = cache_client_->WriteRow(row);
+    // Bad rc should not bring down the pipeline
+    if (rc.IsError()) {
+      MS_LOG(WARNING) << "Cache not successful." << rc.ToString();
+    }
+    rq->SetState(TensorRowRequest::State::kClean);
+  }
+  return Status::OK();
+}
+
+Status CacheMergeOp::GetRq(row_id_type row_id, CacheMergeOp::TensorRowRequest **out) {
+  RETURN_UNEXPECTED_IF_NULL(out);
+  std::unique_lock<std::mutex> lck(mux_);
+  auto it = cache_miss_map_.find(row_id);
+  if (it != cache_miss_map_.end()) {
+    *out = it->second.GetMutablePointer();
+  } else {
+    // We will create a new one.
+    auto alloc = Services::GetAllocator<TensorRowRequest>();
+    auto r = cache_miss_map_.emplace(row_id, MemGuard<TensorRowRequest, Allocator<TensorRowRequest>>(alloc));
+    if (r.second) {
+      auto &mem = r.first->second;
+      RETURN_IF_NOT_OK(mem.allocate(1, row_id));
+      *out = mem.GetMutablePointer();
+    } else {
+      RETURN_STATUS_UNEXPECTED("Map insert fail.");
+    }
+  }
+  return Status::OK();
+}
+Status CacheMergeOp::PrepareNodePostAction() {  // Run any common code from super class first before adding our own
+                                                // specific logic
+  CHECK_FAIL_RETURN_UNEXPECTED(child_.size() == 2, "Incorrect number of children");
+  RETURN_IF_NOT_OK(ParallelOp::PrepareNodePostAction());
+  // Get the computed check sum from all ops in the cache miss class
+  uint32_t cache_crc = DatasetOp::GenerateCRC(child_[kCacheMissChildIdx]);
+  // This is a mappable cache op so the id's need to be generated.
+  // Construct the cache
+  const bool generate_ids = false;
+  Status rc = cache_client_->CreateCache(cache_crc, generate_ids);
+  if (rc.get_code() == StatusCode::kDuplicateKey) {
+    // We are told the cache has been created already.
+    MS_LOG(INFO) << "Cache created already";
+    rc = Status::OK();
+  }
+  RETURN_IF_NOT_OK(rc);
+  return Status::OK();
+}
+Status CacheMergeOp::ComputeColMap() {
+  CHECK_FAIL_RETURN_UNEXPECTED(child_[kCacheMissChildIdx] != nullptr, "Cache miss stream empty");
+  if (column_name_id_map().empty()) {
+    column_name_id_map_ = child_[kCacheMissChildIdx]->column_name_id_map();
+  }
+  CHECK_FAIL_RETURN_UNEXPECTED(!column_name_id_map().empty(), "No column map detected");
+  return Status::OK();
+}
+Status CacheMergeOp::TensorRowRequest::Wait(TensorRow *out) {
+  RETURN_UNEXPECTED_IF_NULL(out);
+  // Block until the missing row is in the pool.
+  RETURN_IF_NOT_OK(use_count_.P());
+  std::unique_lock<std::mutex> lck(dq_mux_);
+  CHECK_FAIL_RETURN_UNEXPECTED(!row_.empty(), "Programming error");
+  *out = std::move(row_.front());
+  row_.pop_front();
+  return Status::OK();
+}
+void CacheMergeOp::TensorRowRequest::WakeUpAny(TensorRow &&row) {
+  std::unique_lock<std::mutex> lck(dq_mux_);
+  // Technically number of this row shows up in the cache miss stream is equal to the number
+  // of P() call. However the cleaner wants it too. So we need an extra copy.
+  if (GetState() == State::kEmpty) {
+    // We will do a deep copy
+    for (auto &ts : row) {
+      auto out_ts = std::make_shared<Tensor>(ts->shape(), ts->type(), ts->GetBuffer(), ts->SizeInBytes());
+      cleaner_copy_.push_back(out_ts);
+    }
+    cleaner_copy_.setId(row.getId());
+    // Change the state to dirty
+    SetState(State::kDirty);
+  }
+  row_.push_back(std::move(row));
+  // Bump up the use count by 1. This wake up any parallel worker which is waiting
+  // for this row.
+  use_count_.V();
+}
+Status CacheMergeOp::TensorRowRequest::Release(TensorRow *out) {
+  RETURN_UNEXPECTED_IF_NULL(out);
+  // We are not holding any mutex here because the cleaner isn't really touching the deque row_.
+  // In case we have multiple cleaners and they all see the copy, only one of them will
+  // get it.
+  auto expected = State::kDirty;
+  if (st_.compare_exchange_strong(expected, State::kClean)) {
+    *out = std::move(cleaner_copy_);
+  }
+  return Status::OK();
+}
+// Builder constructor. Creates the builder object.
+CacheMergeOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_(nullptr) {
+  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
+  build_num_workers_ = cfg->num_parallel_workers();
+  build_op_connector_size_ = cfg->op_connector_size();
+  build_num_cleaners_ = 1;
+}
+
+// Check if the required parameters are set by the builder.
+Status CacheMergeOp::Builder::SanityCheck() const {
+  if (build_cache_client_ == nullptr) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "CacheMergeOp requires a CacheClient");
+  }
+  // Make sure the cache client has a valid session
+  if (!build_cache_client_->session_id()) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
+                  "Cache client for CacheMergeOp is missing session id");
+  }
+  return Status::OK();
+}
+
+// The builder "build" method creates the final object and does some init on it
+Status CacheMergeOp::Builder::Build(std::shared_ptr<CacheMergeOp> *ptr) {
+  RETURN_IF_NOT_OK(SanityCheck());
+  *ptr = std::make_shared<CacheMergeOp>(build_num_workers_, build_op_connector_size_, build_num_cleaners_,
+                                        build_cache_client_, build_sampler_);
+  return Status::OK();
+}
+
+// Pre-Visitor accept method for NodePass
+Status CacheMergeOp::PreAccept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call the pre-visitation
+  return p->PreRunOnNode(shared_from_base<CacheMergeOp>(), modified);
+}
+
+// Visitor accept method for NodePass
+Status CacheMergeOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<CacheMergeOp>(), modified);
+}
+
+Status CacheMergeOp::EoeReceived(int32_t worker_id) {
+  // If we are in a repeat path, send the eoe up.
+  // Otherwise ignore it.
+  if (BitTest(op_ctrl_flags_, kDeOpRepeated)) {
+    return DatasetOp::EoeReceived(worker_id);
+  }
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.h
new file mode 100644
index 00000000000..df37465fc44
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.h
@@ -0,0 +1,196 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_DATASETOPS_CACHE_MERGE_OP_H_
+#define DATASET_ENGINE_DATASETOPS_CACHE_MERGE_OP_H_
+
+#include <atomic>
+#include <deque>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <utility>
+#include "minddata/dataset/core/tensor_row.h"
+#include "minddata/dataset/engine/cache/cache_client.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/semaphore.h"
+
+namespace mindspore {
+namespace dataset {
+/// \brief Provides method to merge two streams (one from CacheLookup and one from cache miss stream) into one single
+/// stream
+class CacheMergeOp : public ParallelOp {
+ public:
+  // Some handshake structures among the main thread, cleaner threads and parallel op threads.
+  class TensorRowRequest {
+   public:
+    enum class State : uint8_t {
+      kEmpty = 0,  // No row in the deque
+      kDirty = 1,  // Cleaner hasn't flushed it to the cache server yet.
+      kClean = 2   // The row has been flushed already.
+    };
+    explicit TensorRowRequest(row_id_type id) : st_(State::kEmpty), use_count_(0) {}
+    ~TensorRowRequest() = default;
+    State GetState() const { return st_; }
+    void SetState(State newState) { st_ = newState; }
+    Status Wait(TensorRow *out);
+    void WakeUpAny(TensorRow &&row);
+    Status Release(TensorRow *out);
+
+   private:
+    std::mutex dq_mux_;
+    std::atomic<State> st_;
+    Semaphore use_count_;
+    std::deque<TensorRow> row_;
+    TensorRow cleaner_copy_;
+  };
+
+  constexpr static int kCacheHitChildIdx = 0;   // Cache hit stream
+  constexpr static int kCacheMissChildIdx = 1;  // Cache miss stream
+
+  /// \brief The nested builder class inside of the CacheMergeOp is used to help manage all of
+  /// the arguments for constructing it.  Use the builder by setting each argument
+  /// with the provided set methods, and then finally call the build method to execute
+  /// the actual construction.
+  class Builder {
+   public:
+    /// Builder constructor. Creates the builder object.
+    /// \note No default args
+    Builder();
+
+    /// Default destructor
+    ~Builder() = default;
+
+    /// Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetNumWorkers(int32_t num_workers) {
+      build_num_workers_ = num_workers;
+      return *this;
+    }
+
+    /// Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetOpConnectorSize(int32_t connector_size) {
+      build_op_connector_size_ = connector_size;
+      return *this;
+    }
+
+    /// Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetClient(std::shared_ptr<CacheClient> cache_client) {
+      build_cache_client_ = cache_client;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param sampler
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetSampler(std::shared_ptr<Sampler> sampler) {
+      build_sampler_ = std::move(sampler);
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param num_cleaners
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetNumCleaner(int32_t num_cleaners) {
+      build_num_cleaners_ = num_cleaners;
+      return *this;
+    }
+
+    /// The builder "build" method creates the final object and does some init on it.
+    /// \param ptr The shared_ptr to the new CacheMergeOp object
+    /// \return Status
+    Status Build(std::shared_ptr<CacheMergeOp> *ptr);
+
+   private:
+    int32_t build_num_workers_;
+    int32_t build_op_connector_size_;
+    int32_t build_num_cleaners_;
+    std::shared_ptr<CacheClient> build_cache_client_;
+    std::shared_ptr<Sampler> build_sampler_;
+
+    /// Check if the required parameters are set by the builder.
+    /// \return Status The error code return
+    Status SanityCheck() const;
+  };
+
+  /// \brief Constructor
+  /// \param numWorkers Number of parallel workers as a derived class of ParallelOp
+  /// \param opConnector Size Connector size as a derived class of ParallelOp
+  /// \param numCleaners Number of cleaners to move cache miss rows into the cache server
+  /// \param cache_client CacheClient to commmunicate with the Cache server
+  /// \param sampler as a derived class of ParallelOp
+  CacheMergeOp(int32_t numWorkers, int32_t opConnectorSize, int32_t numCleaners,
+               std::shared_ptr<CacheClient> cache_client, const std::shared_ptr<Sampler> &sampler);
+  ~CacheMergeOp();
+  void Print(std::ostream &out, bool show_all) const override;
+  friend std::ostream &operator<<(std::ostream &out, const CacheMergeOp &mo) {
+    mo.Print(out, false);
+    return out;
+  }
+  /// \brief Master thread responsible to spawn all the necessary worker threads for the two streams and
+  /// the threads for the cleaners.
+  /// \return
+  Status operator()() override;
+  /// \brief Entry function for worker thread that fetch rows from CacheLookupOp
+  /// \param workerId
+  /// \return Status object
+  Status WorkerEntry(int32_t workerId) override;
+  Status PrepareNodePostAction() override;
+  /// \brief Entry function for worker thread that fetch rows from the cache miss stream
+  /// \param workerId
+  /// \return Status object
+  Status CacheMissWorkerEntry(int32_t workerId);
+  Status GetRq(row_id_type row_id, TensorRowRequest **);
+
+  /// \brief Base-class override for NodePass pre-visit acceptor
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
+  Status PreAccept(NodePass *p, bool *modified) override;
+
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
+  /// \brief Base-class override for eoe handling
+  /// \param worker_id
+  /// \return Status object
+  Status EoeReceived(int32_t worker_id) override;
+
+ protected:
+  Status ComputeColMap() override;
+
+ private:
+  std::mutex mux_;
+  std::map<row_id_type, MemGuard<TensorRowRequest, Allocator<TensorRowRequest>>> cache_miss_map_;
+  std::unique_ptr<Queue<row_id_type>> io_que_;
+  std::shared_ptr<CacheClient> cache_client_;
+  int32_t num_cleaners_;
+
+  /// \brief These are the entry functions for the cleaner threads. Each cleaner is responsible for
+  /// moving cache miss TensorRow into the CacheServer.
+  /// \return Status object
+  Status Cleaner();
+};
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_ENGINE_DATASETOPS_CACHE_MERGE_OP_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc
new file mode 100644
index 00000000000..143c45b2dcc
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc
@@ -0,0 +1,219 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/engine/datasetops/cache_op.h"
+
+#include <memory>
+#include <vector>
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/repeat_op.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "utils/log_adapter.h"
+
+namespace mindspore {
+namespace dataset {
+// Builder constructor. Creates the builder object.
+CacheOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_(nullptr) {
+  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
+  build_num_workers_ = cfg->num_parallel_workers();
+  rows_per_buffer_ = cfg->rows_per_buffer();
+  build_op_connector_size_ = cfg->op_connector_size();
+}
+
+// Check if the required parameters are set by the builder.
+Status CacheOp::Builder::SanityCheck() const {
+  if (build_cache_client_ == nullptr) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "CacheOp requires a CacheClient");
+  }
+  // Make sure the cache client has a valid session
+  if (!build_cache_client_->session_id()) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Cache client for CacheOp is missing session id");
+  }
+  return Status::OK();
+}
+
+// The builder "build" method creates the final object and does some init on it
+Status CacheOp::Builder::Build(std::shared_ptr<CacheOp> *ptr) {
+  RETURN_IF_NOT_OK(SanityCheck());
+  *ptr = std::make_shared<CacheOp>(build_num_workers_, build_op_connector_size_, rows_per_buffer_, build_cache_client_,
+                                   build_sampler_);
+  RETURN_IF_NOT_OK((*ptr)->InitCache());
+
+  return Status::OK();
+}
+
+// Constructor of CacheOp
+CacheOp::CacheOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
+                 std::shared_ptr<CacheClient> cache_client, std::shared_ptr<Sampler> sampler)
+    : CacheBase(num_workers, op_connector_size, rows_per_buf, cache_client, sampler),
+      num_guys_in_(0),
+      phase_(Phase::kBuildPhase) {}
+
+// Destructor
+CacheOp::~CacheOp() = default;
+
+// Private function for cache setup/init work just after construction
+Status CacheOp::InitCache() { return Status::OK(); }
+
+// This class functor will provide the master loop that drives the logic for performing the work
+Status CacheOp::operator()() {
+  if (!sampler_) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
+                  "CacheOp requires a sampler before it can be executed!");
+  }
+  RETURN_IF_NOT_OK(RegisterResources());
+  // Kick off the workers
+  RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&CacheOp::WorkerEntry, this, std::placeholders::_1)));
+  // required task group sync after launching workers
+  TaskManager::FindMe()->Post();
+  // Wait for the workers to finish caching the rows.
+  RETURN_IF_NOT_OK(WaitForCachingAllRows());
+  RETURN_IF_NOT_OK(FetchSamplesToWorkers());
+  return Status::OK();
+}
+Status CacheOp::CacheAllRows(int32_t worker_id) {
+  // If the current phase is to fill the cache, do it then.
+  if (phase_ == Phase::kBuildPhase) {
+    // We will take the chance to cache the schema at the server.
+    // Just do it once and pick one worker to do it.
+    if (worker_id == 0) {
+      RETURN_IF_NOT_OK(cache_client_->CacheSchema(column_name_id_map()));
+    }
+    MS_LOG(INFO) << "CacheOp first epoch SAVE mode started. Worker: " << worker_id;
+    // SAVE mode loop
+    std::unique_ptr<DataBuffer> db_ptr;
+    RETURN_IF_NOT_OK(this->GetNextInput(&db_ptr, worker_id, 0));
+    while (!db_ptr->eof()) {
+      if (!db_ptr->eoe()) {
+        RETURN_IF_NOT_OK(cache_client_->WriteBuffer(std::move(db_ptr)));
+      } else {
+        // In a repeat-over-cache scenario, any of the "real" leaf operators below us have been set up
+        // as non-repeating leaf ops.  As such, they only do one epoch and then quit.  Since we got the
+        // the eoe to indicate the end of the epoch, we should next expect to get the eof.
+        // Drain this eof so that we don't leave it sitting there on a connector that we'll never fetch
+        // from again.
+        RETURN_IF_NOT_OK(this->GetNextInput(&db_ptr, worker_id, 0));
+        if (!db_ptr->eof()) {
+          RETURN_STATUS_UNEXPECTED("Cache op expects to get an eof after eoe from child.");
+        }
+      }
+      RETURN_IF_NOT_OK(this->GetNextInput(&db_ptr, worker_id, 0));
+    }
+  }
+  // Let the main guy know we are done.
+  auto last_guy_in = num_guys_in_.fetch_add(1);
+  if ((last_guy_in + 1) == num_workers_) {
+    rows_cache_done_.Set();
+  } else {
+    // Let's do a sync up here.
+    RETURN_IF_NOT_OK(rows_cache_done_.Wait());
+  }
+  return Status::OK();
+}
+Status CacheOp::WaitForCachingAllRows() {
+  // Wait for the workers to finish caching the rows.
+  RETURN_IF_NOT_OK(rows_cache_done_.Wait());
+  // Move from build phase to fetch phase if we are the one to fill the cache
+  if (phase_ == Phase::kBuildPhase) {
+    RETURN_IF_NOT_OK(cache_client_->BuildPhaseDone());
+    // Move to the next phase
+    phase_ = Phase::kFetchPhase;
+  }
+  // Get statistics from the server, and if we are not the one to create the cache,
+  // wait until the state changed from build phase to fetch base.
+  CacheClient::ServiceStat stat{};
+  bool BuildPhaseDone = true;
+  do {
+    RETURN_IF_NOT_OK(cache_client_->GetStat(&stat));
+    BuildPhaseDone = stat.cache_service_state == static_cast<uint8_t>(CacheService::State::kFetchPhase);
+    if (!BuildPhaseDone) {
+      std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+  } while (!BuildPhaseDone);
+  const row_id_type min_key = stat.min_row_id;
+  const row_id_type max_key = stat.max_row_id;
+  num_rows_ = max_key - min_key + 1;
+  MS_LOG(INFO) << "Number of rows cached: " << num_rows_;
+  MS_LOG(INFO) << "Number of rows cached in memory : " << stat.num_mem_cached;
+  MS_LOG(INFO) << "Number of rows spilled to disk : " << stat.num_disk_cached;
+  // Now all rows are cached and we have done a sync point check up. Next phase is
+  // is pick up fetch input from sampler and pass up to the caller.
+  RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this));
+  return Status::OK();
+}
+Status CacheOp::WorkerEntry(int32_t worker_id) {
+  TaskManager::FindMe()->Post();
+  RETURN_IF_NOT_OK(CacheAllRows(worker_id));
+  RETURN_IF_NOT_OK(FetchFromCache(worker_id));
+  return Status::OK();
+}
+Status CacheOp::RegisterResources() {
+  RETURN_IF_NOT_OK(CacheBase::RegisterResources());
+  RETURN_IF_NOT_OK(rows_cache_done_.Register(tree_->AllTasks()));
+  RETURN_IF_NOT_OK(keys_miss_.Register(tree_->AllTasks()));
+  return Status::OK();
+}
+
+// Base-class override for setting specific CacheOp configurations. This code will be called
+// during the execution tree prepare phase BEFORE traversing down to child operators.
+uint32_t CacheOp::PrepareFlags() const { return ExecutionTree::kDePrepCache; }
+// Base-class override for special eoe handler.
+// CacheOp must override this because it shall not perform default handling of eoe. Instead
+// the CacheOp manages actions related to the end of the epoch.
+Status CacheOp::EoeReceived(int32_t worker_id) {
+  state_ = OpState::kDeOpIdle;
+  return Status::OK();
+}
+// Base-class override for handling cases when an eof is received.
+Status CacheOp::EofReceived(int32_t worker_id) {
+  // eofReceived is overloaded because we want to manually handle this eof.
+  // Specifically, the default behaviour is to pack it and flow it up to the next connection.
+  // In this case, we want a no-op behaviour so that we can perform correct action.
+  return Status::OK();
+}
+
+// Pre-Visitor accept method for NodePass
+Status CacheOp::PreAccept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call the pre-visitation
+  return p->PreRunOnNode(shared_from_base<CacheOp>(), modified);
+}
+
+// Visitor accept method for NodePass
+Status CacheOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<CacheOp>(), modified);
+}
+
+// A public wrapper for creating the cache through the client
+Status CacheOp::CreateCache(uint32_t cache_crc) {
+  // This is a non-mappable cache op so the id's need to be generated.
+  // Construct the cache
+  const bool generate_ids = true;
+  Status rc = cache_client_->CreateCache(cache_crc, generate_ids);
+  if (rc.get_code() == StatusCode::kDuplicateKey) {
+    // We are told the cache has been created already. So we skip the build phase.
+    phase_ = Phase::kFetchPhase;
+    rc = Status::OK();
+  }
+  RETURN_IF_NOT_OK(rc);
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.h
new file mode 100644
index 00000000000..dd34d549736
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.h
@@ -0,0 +1,168 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_DATASETOPS_CACHE_OP_H_
+#define DATASET_ENGINE_DATASETOPS_CACHE_OP_H_
+
+#include <atomic>
+#include <string>
+#include <utility>
+#include <memory>
+#include "minddata/dataset/engine/datasetops/cache_base_op.h"
+
+namespace mindspore {
+namespace dataset {
+/// \brief CacheOp provides a memory/disk cache that acts as a save-point within a non-mappable dataset.
+/// \note For mappable dataset, please see CacheLookupOp.
+/// \see CacheLookupOp
+class CacheOp : public CacheBase, public RandomAccessOp {
+ public:
+  // This CacheOp is for non-mappable case where it is divided into two phases.
+  // The first phase is we cache all the rows from the child (and let the cache server
+  // assigns row id). No read access in the first phase. Once the cache is fully built,
+  // we switch to second phase and fetch requests from the sampler.
+  enum class Phase : uint8_t { kBuildPhase = 0, kFetchPhase = 1 };
+
+  /// \brief The nested builder class inside of the CacheOp is used to help manage all of
+  /// the arguments for constructing it.  Use the builder by setting each argument
+  /// with the provided set methods, and then finally call the build method to execute
+  /// the actual construction.
+  class Builder {
+   public:
+    // Builder constructor. Creates the builder object.
+    // @note No default args
+    // @return This is a constructor.
+    Builder();
+
+    // Default destructor
+    ~Builder() = default;
+
+    /// \brief Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetNumWorkers(int32_t num_workers) {
+      build_num_workers_ = num_workers;
+      return *this;
+    }
+
+    /// \brief Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetOpConnectorSize(int32_t connector_size) {
+      build_op_connector_size_ = connector_size;
+      return *this;
+    }
+
+    /// Setter method.
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetClient(std::shared_ptr<CacheClient> cache_client) {
+      build_cache_client_ = cache_client;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param rows_per_buffer
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
+      rows_per_buffer_ = rows_per_buffer;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param sampler
+    /// \return Builder setter method returns reference to the builder.
+    Builder &SetSampler(std::shared_ptr<Sampler> sampler) {
+      build_sampler_ = std::move(sampler);
+      return *this;
+    }
+
+    /// \brief The builder "build" method creates the final object and does some init on it.
+    /// \param ptr The shared_ptr to the new CacheOp object
+    /// \return Status
+    Status Build(std::shared_ptr<CacheOp> *ptr);
+
+   private:
+    int32_t build_num_workers_;
+    int32_t rows_per_buffer_;
+    int32_t build_op_connector_size_;
+    std::shared_ptr<CacheClient> build_cache_client_;
+    std::shared_ptr<Sampler> build_sampler_;
+
+    /// \brief Check if the required parameters are set by the builder.
+    /// \return Status The error code return
+    Status SanityCheck() const;
+  };
+
+  /// \brief Constructor of CacheOp
+  /// \note The builder class should be used to call it.
+  /// \param num_workers The number of worker threads.
+  /// \param op_connector_size The size of each queue in the connector.
+  CacheOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
+          std::shared_ptr<CacheClient> cache_client, std::shared_ptr<Sampler> sampler);
+
+  // Destructor
+  ~CacheOp();
+
+  /// \brief Base-class override for setting specific CacheOp configurations. This code will be called
+  /// during the execution tree prepare phase BEFORE traversing down to child operators.
+  uint32_t PrepareFlags() const override;
+  /// \brief Base-class override for special eoe handler.
+  /// CacheOp must override this because it shall not perform default handling of eoe. Instead
+  /// the CacheOp manages actions related to the end of the epoch.
+  /// \return Status - The error code return
+  Status EoeReceived(int32_t worker_id) override;
+  /// \brief Base-class override for NodePass pre-visit acceptor
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
+  Status PreAccept(NodePass *p, bool *modified) override;
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+  /// \brief Base-class override for handling cases when an eof is received.
+  /// \param worker_id - The worker id
+  /// \return Status - The error code return
+  Status EofReceived(int32_t worker_id) override;
+  Status operator()() override;
+  Status WorkerEntry(int32_t worker_id) override;
+  /// \brief Base-class override for handling cases if we allow cache miss
+  bool AllowCacheMiss() override { return false; }
+  /// \brief Base-class override for the name of this operator
+  std::string Name() const override { return "CacheOp"; }
+  /// \brief A public wrapper for creating the cache through the client
+  /// \param[in] cache_crc The crc that identifies the cache
+  /// \see cache_pass.cc
+  /// \return Status return code
+  Status CreateCache(uint32_t cache_crc);
+
+ private:
+  WaitPost rows_cache_done_;
+  std::atomic<int64_t> num_guys_in_;
+  Phase phase_;
+  /// \brief The main thread will wait until all the rows are cached and will start the handshake with the sampler.
+  /// \return Status object
+  Status WaitForCachingAllRows();
+  /// \brief For non-mappable dataset, there is a build phase where we cache all the rows.
+  /// \return Status object
+  Status CacheAllRows(int32_t worker_id);
+  Status RegisterResources() override;
+  /// \brief Private function for cache setup/init work just after construction
+  /// \return Status The error code return
+  Status InitCache();
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_DATASETOPS_CACHE_OP_H_
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/concat_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.cc
similarity index 78%
rename from mindspore/ccsrc/dataset/engine/datasetops/concat_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.cc
index 4bada31e7e7..7acb68350b0 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/concat_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.cc
@@ -17,11 +17,11 @@
 #include <utility>
 
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/datasetops/concat_op.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/datasetops/concat_op.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
 
 namespace mindspore {
 namespace dataset {
@@ -61,46 +61,39 @@ void ConcatOp::Print(std::ostream &out, bool show_all) const {
 Status ConcatOp::operator()() {
   // The children_num_ parameter needs to be put here
   children_num_ = static_cast<int32_t>(child_.size());
-
   TaskManager::FindMe()->Post();
   std::unique_ptr<DataBuffer> buf;
-  RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf));
-
   int eof_count = 0;
-  while (eof_count != children_num_) {
+  while (eof_count == 0) {
     for (int i = 0; i < children_num_; i++) {
-      // 1. Throw the eof buffer when meet it
-      if (buf->eof() || buf->eoe()) {
-        RETURN_IF_NOT_OK(child_[i]->GetNextBuffer(&buf));
+      // 1. Read the first buffer
+      RETURN_IF_NOT_OK(child_[i]->GetNextBuffer(&buf));
+      if (buf->eof()) {
+        eof_count++;
+        continue;
       }
       // 2. Do verification as for column name, column data type and rank of column data
-      RETURN_IF_NOT_OK(Verify(i, buf));
-
+      if (!buf->eoe()) {
+        RETURN_IF_NOT_OK(Verify(i, buf));
+      }
       // 3. Put the data into output_connector
       while (!buf->eoe() && !buf->eof()) {
         RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(buf)));
         RETURN_IF_NOT_OK(child_[i]->GetNextBuffer(&buf));
       }
-
-      // 4. Throw the eoe buffer when meet it
-      if (buf->eoe() && (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat))) {
-        RETURN_IF_NOT_OK(child_[i]->GetNextBuffer(&buf));
-      }
-      // 5. Add eoe buffer after get buffer from all child
-      if (i == (children_num_ - 1)) {
-        auto eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
-        RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer)));
-      }
-      if (buf->eof()) {
-        eof_count++;
-      }
+    }
+    // 4. Add eoe buffer after get buffer from all child
+    if (eof_count == 0) {
+      auto eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
+      RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer)));
     }
   }
-  // 6. Add eof buffer in the end manually
+  CHECK_FAIL_RETURN_UNEXPECTED(eof_count == children_num_,
+                               "Something went wrong, eof count does not match the number of children.");
+  // 5. Add eof buffer in the end manually
   MS_LOG(DEBUG) << "Add the eof buffer manualy in the end.";
   auto eof_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
   RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eof_buffer)));
-
   return Status::OK();
 }
 
@@ -126,12 +119,6 @@ Status ConcatOp::Verify(int32_t id, const std::unique_ptr<DataBuffer> &buf) {
   return Status::OK();
 }
 
-Status ConcatOp::PrepareNodePostAction() {
-  RETURN_IF_NOT_OK(PipelineOp::PrepareNodePostAction());
-  tree_->AddToEOEOpStack(shared_from_this());
-  return Status::OK();
-}
-
 // We need to overwrite the super class ComputeColMap here because the number of children is more than 1.
 Status ConcatOp::ComputeColMap() {
   if (column_name_id_map_.empty()) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/concat_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/engine/datasetops/concat_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.h
index 4bcfdbf6c60..3d3d9df71c4 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/concat_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.h
@@ -20,7 +20,7 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -75,12 +75,6 @@ class ConcatOp : public PipelineOp {
   // @return Status - The error code return
   Status operator()() override;
 
-  // During tree prepare phase, operators may have specific post-operations to perform depending on
-  // their role.
-  // @notes Derived versions of this function should always call it's superclass version first
-  // before providing their own implementations.
-  Status PrepareNodePostAction() override;
-
   // Op name getter
   // @return Name of the current Op
   std::string Name() const override { return "ConcatOp"; }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc
similarity index 85%
rename from mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc
index 3e31f6c0177..9254141308f 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
 
 #include <iomanip>
 #include <iostream>
@@ -23,12 +23,12 @@
 #include <string>
 #include <algorithm>
 
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/datasetops/device_queue_op.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/datasetops/device_queue_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/opt/pass.h"
 #include "utils/system/crc32c.h"
 #include "utils/log_adapter.h"
 
@@ -153,16 +153,38 @@ Status DatasetOp::Remove() {
     }
   }
 
+  // Finally, clear "this" op's parent and child pointers since we have just
+  // disconnected it from the tree and invalidate it's fields.
+  child_.clear();
+  parent_.clear();
+  operator_id_ = kInvalidOperatorId;
+  tree_ = nullptr;
+
   return Status::OK();
 }
 
-// Getter function to get a shared pointer to our childAdds a operator to become our child.
+// Getter function to get a shared pointer to our child
 std::shared_ptr<DatasetOp> DatasetOp::child(int32_t child_index) const {
+  std::shared_ptr<DatasetOp> return_op = nullptr;
+  if (child_.empty()) {
+    return return_op;
+  }
   MS_ASSERT(child_index < static_cast<int>(child_.size()));
   // Return a shared pointer
   return child_[child_index];
 }
 
+// Getter function to get the parent pointer
+void DatasetOp::Parent(DatasetOp **parent, int32_t parent_index) const {
+  if (parent_.empty()) {
+    // common case if this is a root node
+    *parent = nullptr;
+  } else {
+    MS_ASSERT(parent_index < static_cast<int>(parent_.size()));
+    *parent = parent_[parent_index];
+  }
+}
+
 // Creates the connector within this operator
 void DatasetOp::CreateConnector(int32_t num_producers, int32_t num_consumers) {
   MS_LOG(DEBUG) << "Creating connector in tree operator: " << operator_id_ << ". Producer: " << num_producers
@@ -264,19 +286,11 @@ Status DatasetOp::EofReceived(int32_t worker_id) {
 
 // During tree prepare phase, operators may have specific pre-operations to perform depending on
 // their role.
-Status DatasetOp::PrepareNodePreAction() {
-  if (BitTest(tree_->PrepareFlags(), ExecutionTree::kDePrepRepeat)) set_control_flag(kDeOpRepeated);
-  return Status::OK();
-}
+Status DatasetOp::PrepareNodePreAction() { return Status::OK(); }
+
 // During tree prepare phase, operators may have specific post-operations to perform depending on
 // their role.
 Status DatasetOp::PrepareNodePostAction() {
-  // If this op does not have any children and it is in a repeat path of the tree...
-  if (child_.empty() && BitTest(op_ctrl_flags_, kDeOpRepeated)) {
-    // push ourselves onto the eoe operator stack.  Later, a repeat/epoch ctrl operator
-    // above us will consume them.
-    tree_->AddToEOEOpStack(shared_from_this());
-  }
   // Creating Connector object for each op.
   // The consumer of the root node is assumed to be one thread.
   // If multiple threads are consuming from the root node, they will get the ordered data in round robin fashion.
@@ -346,34 +360,13 @@ Status DatasetOp::Accept(NodePass *p, bool *modified) {
   return p->RunOnNode(shared_from_this(), modified);
 }
 
-// A helper function with some common code that leaf nodes can use during
-// prepare phase for checking if they need to assign a sampler to the cache.
-Status DatasetOp::SaveSamplerForCache(bool random_access_op) {
-  // If we are a descendant under a cache op and we have a sampler, then save this sampler
-  // to a stack so that the cache can pick it up during it's processing above us.
-  if (sampler_) {
-    if (BitTest(tree_->PrepareFlags(), ExecutionTree::kDePrepCache)) {
-      // use move semantic to set our sampler_ to null after the move.  This is okay because a sampler is
-      // useless to a random data op.  It was only being used as a temporary holding until the cache can
-      // be created
-      tree_->AddToSamplerStack(sampler_);
-      MS_LOG(INFO) << "Preparing a leaf op: passing sampler up the tree for Cache handling.";
-    } else if (!random_access_op) {
-      // A sampler exists, but we are not in a caching tree and we are not a random access mappable leaf.
-      // This is an error because that type of leaf does not use sampling unless there's a cache to hook it into.
-      RETURN_STATUS_UNEXPECTED(
-        "Non-mappable leaf op has a sampler, but it only supports sampling if there is a cache after it in the tree");
-    }
-  }
-
-  if (!random_access_op) {
-    // Since we don't truly need the sampler for this non-mappable dataset and it's been saved for the cache
-    // we can remove it now from the base.
-    sampler_.reset();
-  }
-
+// Getter for the sampler, and it also removes the sampler from the op
+Status DatasetOp::FetchRemoveSampler(std::shared_ptr<Sampler> *sampler) {
+  *sampler = sampler_;  // It's okay if it sampler_ points to nullptr
+  sampler_.reset();     // clear our member-copy of this pointer.  We no longer have this sampler
   return Status::OK();
 }
+
 uint32_t DatasetOp::GenerateCRC(const std::shared_ptr<DatasetOp> &op) {
   std::stringstream ss;
   op->tree_->Print(ss, op);
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.h
similarity index 89%
rename from mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.h
index ab5cb903575..b4630c16525 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.h
@@ -21,12 +21,13 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "dataset/core/constants.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
+
 // Forward declare
 class ExecutionTree;
 
@@ -45,10 +46,10 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
  public:
   static constexpr int32_t kInvalidOperatorId = -1;
 
-  // Flags that control operator runtime behaviours
+  // Operator control flags
   enum OpControlFlags {
     kDeOpNone = 0,
-    kDeOpRepeated = 1,        // Operator is a leaf node in a repeat path
+    kDeOpRepeated = 1,        // Operator is a node in a repeat path
     kDeOpLastRepeat = 1 << 1  // We are in the last repeat loop
   };
 
@@ -71,17 +72,23 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
   /// \param child - shared pointer to the child to remove.
   Status RemoveChild(std::shared_ptr<DatasetOp> child);
 
-  /// \brief Removes this node from the tree and connects it's parent/child together.
+  /// \brief Removes this node from the tree and connects it's parent/child together
   /// \return Status eerror code returned
   Status Remove();
 
   /// \brief Getter function to get a shared pointer to our child
-  /// \param child_index - An operator can have n children. Indicates choose which child to return.
+  /// \param[in] child_index An operator can have n children. Indicates which child to return.
+  /// \return The shared pointer to the child.  If there are no children, it returns null regardless of the given index
   std::shared_ptr<DatasetOp> child(int32_t child_index) const;
 
-  /// \brief Inserts a operator as the parent current op.
-  /// Inserted op will become the sole parent of the current op.
-  /// The existing parent of the current op will be transferred to the inserted op.
+  /// \brief Getter function to get the pointer to our parent
+  ///     If there are no parents, it returns null regardless of the given index
+  /// \param[in] parent_index An operator can have n parents. Indicates which parent to return.
+  void Parent(DatasetOp **parent, int32_t parent_index) const;
+
+  // Inserts a operator as the parent current op.
+  // Inserted op will become the sole parent of the current op.
+  // The existing parent of the current op will be transferred to the inserted op.
   Status InsertAsParent(std::shared_ptr<DatasetOp> to_add);
 
   /// \brief Creates the connector within this operator
@@ -161,16 +168,6 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
   /// \return Status - The error code return
   virtual Status Reset();
 
-  /// \brief This calls the reset function on this subtree in pre-order
-  /// \return Status - The error code return
-  virtual Status ResetSubtree() {
-    RETURN_IF_NOT_OK(Reset());
-    for (const auto &c : child_) {
-      RETURN_IF_NOT_OK(c->ResetSubtree());
-    }
-    return Status::OK();
-  }
-
   /// \brief During tree prepare phase, operators may have specific pre-operations to perform depending on
   /// their role.
   /// \notes Derived versions of this function should always call it's superclass version first
@@ -296,7 +293,12 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
   /// \return Shared pointer to the sampler (may return nullptr)
   std::shared_ptr<Sampler> sampler() { return sampler_; }
 
-  /// Computes a CRC value for the operator
+  /// \brief Getter for the sampler, and it also removes the sampler from the op
+  /// \param[out] sampler A pointer to the output sampler that was removed
+  /// \return Status error code
+  Status FetchRemoveSampler(std::shared_ptr<Sampler> *sampler);
+
+  // Computes a CRC value for the operator
   static uint32_t GenerateCRC(const std::shared_ptr<DatasetOp> &op);
 
   /// \brief A helper templated function for casting "this" pointer to shared_ptr<derived>
@@ -307,17 +309,24 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
     return std::static_pointer_cast<Derived>(shared_from_this());
   }
 
- protected:
-  /// Adds a parent operator to this operator
-  /// \notes External callers do not have access to this function.
-  /// \param parent - The parent node to add
-  void AddParent(DatasetOp *parent);
+  /// \brief Setter for the sampler.  Allows you to overwrite a previous sampler with a new one.
+  void SetSampler(std::shared_ptr<Sampler> sampler) { sampler_ = sampler; }
 
-  /// Removes a parent operator from this operator
-  /// \notes External callers do not have access to this function.
-  /// \param parent - The parent node to remove
+  /// \brief Checks if this is a leaf node (0 children)
+  /// \return boolean returns true if it's a leaf
+  bool IsLeaf() { return (child_.empty()); }
+
+ protected:
+  /// \brief Removes a parent operator from this operator
+  /// \notes External callers do not have access to this function
+  /// \param[in] parent The parent node to remove
   void RemoveParent(const DatasetOp *parent);
 
+  /// \brief Adds a parent operator to this operator
+  /// \notes External callers do not have access to this function
+  /// \param[in] parent The parent node to add
+  void AddParent(DatasetOp *parent);
+
   /// Compute the current op's column map using its child's column map.
   /// Get called during the tree post-prepare phase in PrepareNodePostAction.
   /// This base implementation just inherits the map from child 0, and can only be used if the number of children is 1.
@@ -325,12 +334,6 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
   /// \return - Status
   virtual Status ComputeColMap();
 
-  /// A helper function with some common code that leaf nodes can use during
-  /// pre/pare phase for checking if they need to assign a sampler to the cache.
-  /// \param random_access_op - indicate if this is a mappable random access leaf or not
-  /// \return - Status
-  Status SaveSamplerForCache(bool random_access_op);
-
   std::vector<std::shared_ptr<DatasetOp>> child_;                // Child nodes
   std::vector<DatasetOp *> parent_;                              // Parent nodes. No ownership
   std::shared_ptr<Sampler> sampler_;                             // Some leaf ops might have a sampler
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc
index 0f1fefc0f08..4fe779246b5 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc
@@ -17,16 +17,16 @@
 #include <iomanip>
 #include <iostream>
 #include <memory>
-#include "dataset/core/config_manager.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/device_queue_op.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/engine/perf/profiling.h"
-#include "dataset/engine/perf/device_queue_tracing.h"
-#include "dataset/util/status.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/device_queue_op.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/perf/device_queue_tracing.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.h
index a8540045938..0fb4fb093d3 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.h
@@ -20,15 +20,15 @@
 #include <string>
 #include <vector>
 
-#include "dataset/engine/datasetops/pipeline_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/util/status.h"
 
 #ifdef ENABLE_TDTQUE
-#include "dataset/engine/tdt/tdt_plugin.h"
+#include "minddata/dataset/engine/tdt/tdt_plugin.h"
 #endif
 
 #ifdef ENABLE_GPUQUE
-#include "device/gpu/gpu_buffer_mgr.h"
+#include "runtime/device/gpu/gpu_buffer_mgr.h"
 using mindspore::device::BlockQueueStatus_T;
 using mindspore::device::GpuBufferMgr;
 #endif
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc
index 81c93c6e1cd..f32648a3dff 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc
@@ -13,24 +13,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/filter_op.h"
+#include "minddata/dataset/engine/datasetops/filter_op.h"
 #include <algorithm>
 #include <cstring>
 #include <iomanip>
 #include <iostream>
 #include <memory>
 #include <vector>
-#include "dataset/core/config_manager.h"
-#include "dataset/core/constants.h"
-#include "dataset/core/global_context.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 #include "utils/log_adapter.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/filter_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/filter_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.h
index 36f70cb82f5..fcc6e577dff 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/filter_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.h
@@ -21,9 +21,9 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/queue.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/queue.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/map_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op.cc
index 05a1ac79254..e5e70dbbdf2 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op.cc
@@ -13,24 +13,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/map_op.h"
+#include "minddata/dataset/engine/datasetops/map_op.h"
 #include <cstring>
 #include <iomanip>
 #include <iostream>
 #include <memory>
 #include <vector>
-#include "dataset/core/config_manager.h"
+#include "minddata/dataset/core/config_manager.h"
 
-#include "dataset/core/constants.h"
-#include "dataset/core/global_context.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 #include "utils/log_adapter.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/map_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/map_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op.h
index 371d865196a..b1cd58010f1 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/map_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op.h
@@ -21,9 +21,9 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/queue.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/queue.h"
 
 namespace mindspore {
 namespace dataset {
@@ -181,6 +181,13 @@ class MapOp : public ParallelOp {
   // @return Name of the current Op
   std::string Name() const override { return "MapOp"; }
 
+  // List of tensor ops getter/setter
+  // @Return the vector of tensor ops by non-const reference
+
+  auto &TFuncs() { return tfuncs_; }
+
+  const auto &TFuncs() const { return tfuncs_; }
+
  private:
   // Local queues where worker threads can pop from.
   // Popping directly from the Connector can block if the previous designated threads haven't pop.
@@ -188,7 +195,7 @@ class MapOp : public ParallelOp {
   QueueList<std::unique_ptr<DataBuffer>> local_queues_;
 
   // Static variables to be ready by worker threads, no modification and readonly
-  const std::vector<std::shared_ptr<TensorOp>> tfuncs_;
+  std::vector<std::shared_ptr<TensorOp>> tfuncs_;
 
   // Variable to store the column name that the tensorOps are consuming
   std::vector<std::string> in_columns_;
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/parallel_op.cc
similarity index 89%
rename from mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/parallel_op.cc
index 244861a6c81..abb827aea85 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/parallel_op.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
 
 #include <iostream>
 #include <utility>
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/parallel_op.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/parallel_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/parallel_op.h
index f59d4bfc53b..da54ce13312 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/parallel_op.h
@@ -18,9 +18,9 @@
 
 #include <memory>
 #include <vector>
-#include "dataset/core/constants.h"
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/pipeline_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/pipeline_op.cc
index 1d017a4d3e0..fff5ba19e7c 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/pipeline_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
 #include <iomanip>
 #include <iostream>
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/pipeline_op.h
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/pipeline_op.h
index cb3c76813b6..0538349f484 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/pipeline_op.h
@@ -18,7 +18,7 @@
 
 #include <memory>
 #include <vector>
-#include "dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/project_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.cc
index 5ce40560244..e232a641648 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "dataset/engine/datasetops/project_op.h"
+#include "minddata/dataset/engine/datasetops/project_op.h"
 #include <algorithm>
 #include <iomanip>
 #include <iostream>
@@ -22,10 +22,10 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/project_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.h
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/project_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.h
index 628c1342baf..c2f14d34b73 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/project_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.h
@@ -20,7 +20,7 @@
 #include <string>
 #include <vector>
 
-#include "dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc
index 23cd29d2951..d12660e6f90 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc
@@ -13,18 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/rename_op.h"
+#include "minddata/dataset/engine/datasetops/rename_op.h"
 #include <iomanip>
 #include <vector>
 #include <utility>
 #include <unordered_map>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/constants.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/opt/pass.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/rename_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.h
index e209c075d61..d846bb1b409 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.h
@@ -20,9 +20,9 @@
 #include <queue>
 #include <string>
 #include <vector>
-#include "dataset/core/tensor.h"
-#include "dataset/engine/datasetops/pipeline_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.cc
similarity index 82%
rename from mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.cc
index 4999dddd029..6d3dc91ed32 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.cc
@@ -17,11 +17,11 @@
 #include <iostream>
 #include <utility>
 
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/datasetops/repeat_op.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/datasetops/repeat_op.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 #include "utils/log_adapter.h"
 
@@ -77,26 +77,6 @@ void RepeatOp::Print(std::ostream &out, bool show_all) const {
   }
 }
 
-// Base-class override for executing specific RepeatOp configurations. This code will be called
-// during the execution tree prepare phase when it is visiting this operator.
-Status RepeatOp::PrepareNodePostAction() {
-  // Run any common code from super class first before adding our own specific logic
-  RETURN_IF_NOT_OK(PipelineOp::PrepareNodePostAction());
-  std::shared_ptr<DatasetOp> leaf_op = tree_->PopFromEOEOpStack();
-  while (leaf_op != nullptr) {
-    // Track the leaf operators that are under this repeat op.
-    eoe_ops_.push_back(leaf_op);
-    leaf_op = tree_->PopFromEOEOpStack();
-  }
-  // Push ourselves to the stack in case one of our ascendants is repeat too.
-  tree_->AddToEOEOpStack(shared_from_this());
-  return Status::OK();
-}
-
-// Base-class override for setting specific RepeatOp configurations. This code will be called
-// during the execution tree prepare phase BEFORE traversing down to child operators.
-uint32_t RepeatOp::PrepareFlags() const { return ExecutionTree::kDePrepRepeat; }
-
 // This function returns the buffer that is at the top of our output connector. The caller is
 // typically our parent node, when the parent is asking us to provide the next buffer of data.
 // Since RepeatOp is an inlined op, getting a buffer from us will simply bounce you to get
@@ -130,7 +110,8 @@ Status RepeatOp::GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer, int32_t wo
 // Base-class override for handling cases when an eoe is received.
 Status RepeatOp::EoeReceived(int32_t worker_id) {
   repeat_count_++;
-  MS_LOG(DEBUG) << "Repeat operator end of epoch message received. Repeat count is now: " << repeat_count_ << ".";
+  MS_LOG(DEBUG) << "Repeat operator (" << operator_id_
+                << ") end of epoch message received. Repeat count is now: " << repeat_count_ << ".";
   bool repeated = BitTest(op_ctrl_flags_, kDeOpRepeated);
   bool last_repeat = BitTest(op_ctrl_flags_, kDeOpLastRepeat);
   // If we've reached the requested repeat count, then flag the eoe nodes
@@ -149,8 +130,12 @@ Status RepeatOp::EoeReceived(int32_t worker_id) {
     return Status::OK();
   }
 
-  //  base-class ResetSubtree
-  return (DatasetOp::ResetSubtree());
+  // Invoke a reset against the eoe nodes only.
+  for (auto &eoe_op : eoe_ops_) {
+    RETURN_IF_NOT_OK(eoe_op->Reset());
+  }
+
+  return Status::OK();
 }
 
 // Class functor operator () override.
@@ -178,6 +163,18 @@ int32_t RepeatOp::num_consumers() const {
   }
 }
 
+// Drive reset actions if needed
+Status RepeatOp::Reset() {
+  // If there's nested repeats, an ascendant repeat may have ourself listed as an eoe op.
+  // In that case, we now have to bounce the reset down to our own eoe ops.
+  MS_LOG(DEBUG) << "Repeat operator (" << operator_id_ << ") reset.";
+  for (auto &eoe_op : eoe_ops_) {
+    RETURN_IF_NOT_OK(eoe_op->Reset());
+  }
+  state_ = OpState::kDeOpRunning;
+  return Status::OK();
+}
+
 int32_t RepeatOp::num_producers() const {
   if (child_.empty() || child_[0] == nullptr) {
     MS_LOG(DEBUG) << "Repeat operator, pointer to child node is null. Returning 0.";
@@ -187,6 +184,12 @@ int32_t RepeatOp::num_producers() const {
   }
 }
 
+// Pre-Visitor accept method for NodePass
+Status RepeatOp::PreAccept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call the pre-visitation
+  return p->PreRunOnNode(shared_from_base<RepeatOp>(), modified);
+}
+
 // Visitor accept method for NodePass
 Status RepeatOp::Accept(NodePass *p, bool *modified) {
   // Downcast shared pointer then call visitor
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.h
similarity index 85%
rename from mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.h
index bba85c3bb5a..f5259de30e9 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.h
@@ -18,8 +18,9 @@
 
 #include <memory>
 #include <string>
+#include <utility>
 #include <vector>
-#include "dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -82,14 +83,6 @@ class RepeatOp : public PipelineOp {
   // @return Status - The error code return
   Status operator()() override;
 
-  // Base-class override for setting specific RepeatOp configurations. This code will be called
-  // during the execution tree prepare phase BEFORE traversing down to child operators.
-  uint32_t PrepareFlags() const override;
-
-  // Base-class override for executing specific RepeatOp configurations. This code will be called
-  // during the execution tree post-prepare phase when it is visiting this operator.
-  Status PrepareNodePostAction() override;
-
   // This function returns the buffer that is at the top of our output connector. The caller is
   // typically our parent node, when the parent is asking us to provide the next buffer of data.
   // Since RepeatOp is an inlined op, getting a buffer from us will simply bounce you to get
@@ -110,6 +103,10 @@ class RepeatOp : public PipelineOp {
   // @param worker_id - The worker id
   Status EofReceived(int32_t worker_id) override;
 
+  /// \brief reset Op
+  /// \@return Status - The error code return
+  Status Reset() override;
+
   // Base-class override. Return the number of workers in the first parent.
   // @param workerId - The worker id
   int32_t num_consumers() const override;
@@ -118,16 +115,26 @@ class RepeatOp : public PipelineOp {
   // @param workerId - The worker id
   int32_t num_producers() const override;
 
-  // Base-class override for NodePass visitor acceptor.
-  // @param p - Pointer to the NodePass to be accepted.
-  // @param modified - Whether this node visit modified the pipeline.
-  // @return - Status of the node visit.
+  /// \brief Base-class override for NodePass pre-visit acceptor
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
+  Status PreAccept(NodePass *p, bool *modified) override;
+
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
   Status Accept(NodePass *p, bool *modified) override;
 
   // Op name getter
   // @return Name of the current Op
   std::string Name() const override { return "RepeatOp"; }
 
+  /// \brief Adds an operator to the repeat ops list of tracked leaf/eoe nodes
+  /// \param[in] eoe_op The input leaf/eoe operator to add to the list
+  void AddToEoeList(std::shared_ptr<DatasetOp> eoe_op) { eoe_ops_.push_back(std::move(eoe_op)); }
+
  private:
   int32_t max_repeats_;                              // The number of repeats that the user requested
   int32_t repeat_count_;                             // A counter for the current number of executed repeats
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc
index f86fcc602b7..0eb5f29eafa 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc
@@ -25,14 +25,14 @@
 #include <random>
 #include <utility>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/engine/datasetops/shuffle_op.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.h
index 14b1e4511ee..86bea7cc77c 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.h
@@ -24,11 +24,11 @@
 #include <unordered_map>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/pipeline_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.cc
index f6b0fe689c1..2fe8cbeaa63 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.cc
@@ -17,12 +17,12 @@
 #include <iostream>
 #include <utility>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/datasetops/skip_op.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/datasetops/skip_op.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.h
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/skip_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.h
index 4cb658b2a7e..a717d0efa4c 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.h
@@ -19,7 +19,7 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
new file mode 100644
index 00000000000..389e3f5af6d
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
@@ -0,0 +1,32 @@
+add_subdirectory(sampler)
+file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
+
+set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
+    io_block.cc
+    image_folder_op.cc
+    mnist_op.cc
+    coco_op.cc
+    cifar_op.cc
+    random_data_op.cc
+    celeba_op.cc
+    text_file_op.cc
+    clue_op.cc
+    )
+
+set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
+    ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES}
+    mindrecord_op.cc
+    tf_reader_op.cc
+    )
+
+if (ENABLE_PYTHON)
+    set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
+        ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES}
+        generator_op.cc
+        voc_op.cc
+        manifest_op.cc
+        )
+endif()
+
+add_library(engine-datasetops-source OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES})
\ No newline at end of file
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc
index c7a4269a39b..9d7d5622a67 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc
@@ -13,16 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
 */
-#include "dataset/engine/datasetops/source/celeba_op.h"
+#include "minddata/dataset/engine/datasetops/source/celeba_op.h"
 
 #include <fstream>
 #include <iomanip>
-#include "dataset/core/config_manager.h"
-#include "dataset/util/path.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
 
 namespace mindspore {
 namespace dataset {
@@ -408,6 +409,12 @@ Status CelebAOp::Reset() {
   return Status::OK();
 }
 
+// Visitor accept method for NodePass
+Status CelebAOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<CelebAOp>(), modified);
+}
+
 Status CelebAOp::ComputeColMap() {
   // Set the column name map (base class field)
   if (column_name_id_map_.empty()) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.h
similarity index 92%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.h
index a6fa495a144..ef183f8e657 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.h
@@ -24,12 +24,12 @@
 #include <utility>
 #include <fstream>
 
-#include "dataset/util/status.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/util/queue.h"
-#include "dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
 
 #define CLOSE_FILE(attr_file, pairition_file) \
   do {                                        \
@@ -169,6 +169,12 @@ class CelebAOp : public ParallelOp, RandomAccessOp {
   // @return Status - The error code return
   Status AddIOBlock(std::unique_ptr<DataBuffer> *data_buffer);
 
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p Pointer to the NodePass to be accepted
+  /// \param[out] modified Indicator if the node was changed at all
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
   // Op name getter
   // @return Name of the current Op
   std::string Name() const { return "CelebAOp"; }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc
index 8dd615a8c16..06be682bfd5 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/cifar_op.h"
+#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
 
 #include <algorithm>
 #include <fstream>
@@ -21,11 +21,12 @@
 #include <utility>
 
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -450,6 +451,12 @@ Status CifarOp::CountTotalRows(const std::string &dir, bool isCIFAR10, int64_t *
   }
 }
 
+// Visitor accept method for NodePass
+Status CifarOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<CifarOp>(), modified);
+}
+
 Status CifarOp::ComputeColMap() {
   // set the column name map (base class field)
   if (column_name_id_map_.empty()) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.h
similarity index 89%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.h
index 917b23db941..60169f32bf5 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.h
@@ -22,17 +22,17 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/util/path.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/services.h"
-#include "dataset/util/status.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
@@ -155,6 +155,12 @@ class CifarOp : public ParallelOp, public RandomAccessOp {
   // @return
   static Status CountTotalRows(const std::string &dir, bool isCIFAR10, int64_t *count);
 
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p Pointer to the NodePass to be accepted
+  /// \param[out] modified Indicator if the node was changed at all
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
   // Op name getter
   // @return Name of the current Op
   std::string Name() const override { return "CifarOp"; }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc
index 9fceb6f3331..958514583ae 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/clue_op.h"
+#include "minddata/dataset/engine/datasetops/source/clue_op.h"
 
 #include <string>
 #include <vector>
@@ -21,12 +21,12 @@
 #include <iomanip>
 #include <utility>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/util/task_manager.h"
-#include "dataset/engine/jagged_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/engine/jagged_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.h
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.h
index 487ed0d47f5..ab429561ec6 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.h
@@ -23,9 +23,9 @@
 #include <vector>
 #include <nlohmann/json.hpp>
 
-#include "dataset/util/auto_index.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/util/auto_index.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc
index 92f67947695..daef2f284bb 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc
@@ -13,17 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/coco_op.h"
+#include "minddata/dataset/engine/datasetops/source/coco_op.h"
 
 #include <algorithm>
 #include <fstream>
 #include <iomanip>
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -624,6 +625,12 @@ Status CocoOp::GetClassIndexing(const std::string &dir, const std::string &file,
   return Status::OK();
 }
 
+// Visitor accept method for NodePass
+Status CocoOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<CocoOp>(), modified);
+}
+
 Status CocoOp::ComputeColMap() {
   // Set the column name map (base class field)
   if (column_name_id_map_.empty()) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.h
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.h
index 37918537988..31070c26f5e 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.h
@@ -23,17 +23,17 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/path.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/status.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
@@ -200,6 +200,12 @@ class CocoOp : public ParallelOp, public RandomAccessOp {
   static Status GetClassIndexing(const std::string &dir, const std::string &task_type, const std::string &task_mode,
                                  std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing);
 
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p Pointer to the NodePass to be accepted
+  /// \param[out] modified Indicator if the node was changed at all
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
  private:
   // Initialize Sampler, calls sampler->Init() within
   // @return Status - The error code return
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.cc
index 36c221fc16b..773dfc78b66 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/generator_op.h"
+#include "minddata/dataset/engine/datasetops/source/generator_op.h"
 #include <iomanip>
-#include "dataset/core/global_context.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/util/task_manager.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.h
index 98dd2d70a17..d09bfc3d716 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.h
@@ -22,11 +22,11 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/pipeline_op.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc
index 837eae1e3cc..85839303db9 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
 #include <fstream>
 #include <iomanip>
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.h
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.h
index 6629fd6092e..153751d3c5e 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.h
@@ -25,18 +25,18 @@
 #include <set>
 #include <utility>
 #include <vector>
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/path.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/services.h"
-#include "dataset/util/status.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/io_block.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/io_block.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.cc
index 0963f1a67a3..2b2542430bd 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/io_block.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
 
 #include <string>
 #include <vector>
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/io_block.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.h
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/io_block.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.h
index 87b417f027e..df26aa1fc14 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/io_block.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.h
@@ -19,8 +19,8 @@
 #include <string>
 #include <vector>
 
-#include "dataset/util/auto_index.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/auto_index.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc
index e65da8707b4..0476baf56f1 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/manifest_op.h"
+#include "minddata/dataset/engine/datasetops/source/manifest_op.h"
 
 #include <algorithm>
 #include <fstream>
@@ -21,11 +21,12 @@
 #include <nlohmann/json.hpp>
 
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -416,6 +417,12 @@ Status ManifestOp::GetClassIndexing(const std::string &file, const py::dict &dic
   return Status::OK();
 }
 
+// Visitor accept method for NodePass
+Status ManifestOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<ManifestOp>(), modified);
+}
+
 Status ManifestOp::ComputeColMap() {
   // Set the column name map (base class field)
   if (column_name_id_map_.empty()) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.h
index c180ea581da..bac8f04c94d 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.h
@@ -22,17 +22,17 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/services.h"
-#include "dataset/util/status.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
@@ -172,6 +172,12 @@ class ManifestOp : public ParallelOp, public RandomAccessOp {
   static Status GetClassIndexing(const std::string &file, const py::dict &dict, const std::string &usage,
                                  std::map<std::string, int32_t> *output_class_indexing);
 
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p Pointer to the NodePass to be accepted
+  /// \param[out] modified Indicator if the node was changed at all
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
   // Op name getter
   // @return Name of the current Op
   std::string Name() const override { return "ManifestOp"; }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc
index 2b9d010ebb3..cf1493eb78e 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/mindrecord_op.h"
+#include "minddata/dataset/engine/datasetops/source/mindrecord_op.h"
 
 #include <algorithm>
 #include <cstdint>
@@ -22,14 +22,14 @@
 #include <utility>
 
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/constants.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.h
index af405a8f5b5..367505b1720 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.h
@@ -27,16 +27,16 @@
 #include <unordered_set>
 #include <vector>
 
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/status.h"
-#include "mindrecord/include/shard_column.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_reader.h"
-#include "mindrecord/include/common/shard_utils.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/mindrecord/include/shard_column.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc
index e98f8ae8c11..11ad18865e1 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc
@@ -13,16 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/mnist_op.h"
+#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
 
 #include <fstream>
 #include <iomanip>
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -428,6 +429,12 @@ Status MnistOp::CountTotalRows(const std::string &dir, int64_t *count) {
   return Status::OK();
 }
 
+// Visitor accept method for NodePass
+Status MnistOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<MnistOp>(), modified);
+}
+
 Status MnistOp::ComputeColMap() {
   // set the column name map (base class field)
   if (column_name_id_map_.empty()) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.h
similarity index 91%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.h
index 9bd6276a11a..039f6b112f8 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.h
@@ -23,16 +23,16 @@
 #include <vector>
 #include <utility>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/util/path.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/status.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
@@ -152,6 +152,12 @@ class MnistOp : public ParallelOp, public RandomAccessOp {
   // @return
   static Status CountTotalRows(const std::string &dir, int64_t *count);
 
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p Pointer to the NodePass to be accepted
+  /// \param[out] modified Indicator if the node was changed at all
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
   // Op name getter
   // @return Name of the current Op
   std::string Name() const override { return "MnistOp"; }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc
index 3a865d8d691..46f3adfa62e 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc
@@ -14,14 +14,15 @@
  * limitations under the License.
  */
 
-#include "dataset/engine/datasetops/source/random_data_op.h"
+#include "minddata/dataset/engine/datasetops/source/random_data_op.h"
 #include <iomanip>
 #include <random>
-#include "dataset/engine/execution_tree.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/util/random.h"
-#include "dataset/util/wait_post.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/wait_post.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -406,6 +407,12 @@ Status RandomDataOp::Reset() {
   return Status::OK();
 }
 
+// Visitor accept method for NodePass
+Status RandomDataOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<RandomDataOp>(), modified);
+}
+
 Status RandomDataOp::ComputeColMap() {
   // Extract the column name mapping from the schema and save it in the class.
   if (column_name_id_map_.empty()) {
@@ -415,15 +422,5 @@ Status RandomDataOp::ComputeColMap() {
   }
   return Status::OK();
 }
-
-// During tree prepare phase, operators may have specific post-operations to perform depending on
-// their role.
-Status RandomDataOp::PrepareNodePostAction() {
-  // Run common code from super class before adding RandomDataOp specific handling
-  RETURN_IF_NOT_OK(ParallelOp::PrepareNodePostAction());
-  // Specific handling for this op, we need to do cache op work to assign the sampler to the cache.
-  RETURN_IF_NOT_OK(DatasetOp::SaveSamplerForCache(false));
-  return Status::OK();
-}
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.h
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.h
index b2af27dda3a..c77695439d1 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.h
@@ -23,12 +23,12 @@
 #include <string>
 #include <vector>
 #include <utility>
-#include "dataset/util/status.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/data_type.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
@@ -203,12 +203,6 @@ class RandomDataOp : public ParallelOp {
   // @return Name of the current Op
   std::string Name() const override { return "RandomDataOp"; }
 
-  // During tree prepare phase, operators may have specific post-operations to perform depending on
-  // their role.
-  // @notes Derived versions of this function should always call it's superclass version first
-  // before providing their own implementations.
-  Status PrepareNodePostAction() override;
-
  private:
   /**
    * The entry point code for when workers are launched
@@ -266,6 +260,12 @@ class RandomDataOp : public ParallelOp {
     return ++buffer_id_;
   }
 
+  // Base-class override for NodePass visitor acceptor.
+  // @param p - Pointer to the NodePass to be accepted.
+  // @param modified - Whether this node visit modified the pipeline.
+  // @return - Status of the node visit.
+  Status Accept(NodePass *p, bool *modified) override;
+
   // Private function for computing the assignment of the column name map.
   // @return - Status
   Status ComputeColMap() override;
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/CMakeLists.txt
similarity index 51%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/CMakeLists.txt
index 5209d9ba4ad..1335d987e8b 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/CMakeLists.txt
@@ -1,12 +1,21 @@
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(engine-datasetops-source-sampler OBJECT
+
+set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES
     distributed_sampler.cc
     pk_sampler.cc
-    python_sampler.cc
     random_sampler.cc
     sampler.cc
     sequential_sampler.cc
     subset_random_sampler.cc
     weighted_random_sampler.cc
     )
+
+if (ENABLE_PYTHON)
+    set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES
+        ${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES}
+        python_sampler.cc
+        )
+endif()
+
+add_library(engine-datasetops-source-sampler OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES})
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
index 9f4a9cf55c6..2b5e7c67c89 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
 
 #include <limits>
 #include <memory>
 
-#include "dataset/engine/data_buffer.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h
index 7083580c6c0..76bcf052f92 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h
@@ -21,7 +21,7 @@
 #include <random>
 #include <vector>
 
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.cc
index cd2cadb9ff1..770c24c8c57 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
 #include <algorithm>
 #include <memory>
-#include "dataset/util/random.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h
index cde8a75b5b3..aed61fa273d 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h
@@ -22,7 +22,7 @@
 #include <random>
 #include <vector>
 
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.cc
index d204c55ce98..50c67bca6c9 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/python_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/python_sampler.h"
 
 #include <memory>
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.h
index 7d653b2087f..61716feb940 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.h
@@ -19,7 +19,7 @@
 #include <limits>
 #include <memory>
 
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.cc
index db0a96ea3ae..998dee2a070 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
 
 #include <algorithm>
 #include <limits>
 #include <memory>
-#include "dataset/util/random.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.h
index b1c54eb98c4..6e21b088b9a 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.h
@@ -20,7 +20,7 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc
index 1584166dc31..60d75d2eec7 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 #include <string>
 
@@ -89,6 +89,7 @@ void Sampler::Print(std::ostream &out, bool show_all) const {
   }
 }
 
+#ifdef ENABLE_PYTHON
 Status Sampler::GetAllIdsThenReset(py::array *data) {
   std::unique_ptr<DataBuffer> db;
   std::shared_ptr<Tensor> sample_ids;
@@ -120,6 +121,7 @@ Status Sampler::GetAllIdsThenReset(py::array *data) {
   RETURN_IF_NOT_OK(ResetSampler());
   return Status::OK();
 }
+#endif
 
 Status Sampler::SetNumSamples(int64_t num_samples) {
   CHECK_FAIL_RETURN_UNEXPECTED(num_samples >= 0, "num_samples is negative");
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.h
index 34c3cb79358..4cae935a425 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.h
@@ -22,10 +22,10 @@
 #include <random>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -74,8 +74,11 @@ class Sampler {
   // @return - The error code return
   virtual Status GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) = 0;
 
+// This function only called by python layer. Not needed by Android.
+#ifdef ENABLE_PYTHON
   // return all ids in one epoch as a numpy array, then call reset
   Status GetAllIdsThenReset(py::array *data);
+#endif
 
   // for next epoch of sampleIds
   // @return - The error code return
@@ -155,5 +158,4 @@ class Sampler {
 };
 }  // namespace dataset
 }  // namespace mindspore
-
 #endif  // DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SAMPLER_H_
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
index 28598da55fb..1cc4ac831a4 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
 
 #include <algorithm>
 #include <memory>
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h
index 06f084fb7ad..c6ccd0d1eb3 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h
@@ -19,7 +19,7 @@
 #include <limits>
 #include <memory>
 
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
index 08a623ed1ba..db2078795e7 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
 
 #include <algorithm>
 #include <memory>
 #include <random>
 #include <string>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/global_context.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h
index ffc7cb17bc4..fccc15e57be 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h
@@ -20,7 +20,7 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
index 6bf3d2d85e1..13863143c04 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
 
 #include <algorithm>
 #include <memory>
@@ -21,8 +21,8 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/global_context.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h
index 1fbe29ed80b..b1a531abe97 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h
@@ -21,7 +21,7 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc
index 818b5ab3f43..c1f5b13a940 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc
@@ -22,13 +22,13 @@
 #include <utility>
 
 #include "common/utils.h"
-#include "dataset/engine/datasetops/source/text_file_op.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/util/task_manager.h"
-#include "dataset/util/wait_post.h"
-#include "dataset/util/random.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/datasetops/source/text_file_op.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/util/wait_post.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/execution_tree.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.h
index 5b787d4dad2..68c226ab80e 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.h
@@ -23,14 +23,14 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/util/status.h"
-#include "dataset/util/auto_index.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/wait_post.h"
-#include "dataset/engine/jagged_connector.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/auto_index.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/wait_post.h"
+#include "minddata/dataset/engine/jagged_connector.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc
index 48f13ff7662..ae7907b5ceb 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/tf_reader_op.h"
+#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
 
 #include <algorithm>
 #include <fstream>
@@ -28,21 +28,21 @@
 #include "proto/example.pb.h"
 #include "./securec.h"
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/connector.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/jagged_connector.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/util/path.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
-#include "dataset/util/task_manager.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/connector.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/jagged_connector.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/util/wait_post.h"
 #include "utils/system/crc32c.h"
 
 namespace mindspore {
@@ -1019,31 +1019,28 @@ Status TFReaderOp::ComputeColMap() {
   return Status::OK();
 }
 
+// Brief If a cache has been added into the ascendant tree over this tf reader, then the cache will be executing
+// a sampler for fetching the data.  As such, any options in the tf reader need to be reset to its defaults so
+// that this tf reader will produce the full set of data into the cache.
+void TFReaderOp::MakeSimpleProducer() {
+  device_id_ = 0;
+  num_devices_ = 1;
+  total_rows_ = 0;
+  shuffle_files_ = false;
+  equal_rows_per_shard_ = false;
+}
+
 // During tree prepare phase, operators may have specific post-operations to perform depending on
 // their role.
 Status TFReaderOp::PrepareNodePostAction() {
   // Run common code from super class before adding TFReaderOp specific handling
   RETURN_IF_NOT_OK(ParallelOp::PrepareNodePostAction());
 
-  // Specific handling for this op, we need to do cache op work so assign the sampler to the cache
-  // TF is a special case because it can support file-based sharding/shuffling, or, if there
-  // is a cache, then it can also do row-based sampler using the sampler on the cache.
-  // Thus, pass true for random access op flag when saving the sampler.  This is a special case,
-  // since usually a non-mappable dataset would pass false here.
-  RETURN_IF_NOT_OK(DatasetOp::SaveSamplerForCache(true));
-
   // Now that the sampler has been saved for the cache, we need to adjust the TFReaderOp to turn it into
   // a simpler producer of all data (no shuffling or sharding or anything)
-  if (BitTest(tree_->PrepareFlags(), ExecutionTree::kDePrepCache)) {
-    device_id_ = 0;
-    num_devices_ = 1;
-    total_rows_ = 0;
-    shuffle_files_ = false;
-    equal_rows_per_shard_ = false;
-    sampler_.reset();  // Normally SaveSampler code did this for us, but we passed in true above (See comment)
-  } else {
+  if (!BitTest(tree_->PrepareFlags(), ExecutionTree::kDePrepCache)) {
     // This sanity check had been delayed until now in the prepare loop.
-    // If we are not in a cache path, then we can validate the the file-based sharding config.
+    // If we are not in a cache path, then we can validate the file-based sharding config.
     // If we are in a cache path, there is no file-based sharding so the check is not correct in that
     // situation.
     if (!equal_rows_per_shard_ && dataset_files_list_.size() < static_cast<uint32_t>(num_devices_)) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.h
index 9226c4c6c5a..c03f3957e9f 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.h
@@ -25,12 +25,12 @@
 #include <utility>
 #include <map>
 
-#include "dataset/util/wait_post.h"
-#include "dataset/util/auto_index.h"
-#include "dataset/util/status.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/util/wait_post.h"
+#include "minddata/dataset/util/auto_index.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
 
 namespace dataengine {
 class Example;
@@ -246,6 +246,11 @@ class TFReaderOp : public ParallelOp {
   // @return Vector of the input file names
   std::vector<std::string> FileNames() { return dataset_files_list_; }
 
+  /// \Brief If a cache has been added into the ascendant tree over this tf reader, then the cache will be executing
+  ///     a sampler for fetching the data.  As such, any options in the tf reader need to be reset to its defaults so
+  ///     that this tf reader will produce the full set of data into the cache.
+  void MakeSimpleProducer();
+
   // During tree prepare phase, operators may have specific post-operations to perform depending on
   // their role.
   // @notes Derived versions of this function should always call it's superclass version first
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc
similarity index 92%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc
index 958aa65b061..e90d423ef42 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc
@@ -13,18 +13,19 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/source/voc_op.h"
+#include "minddata/dataset/engine/datasetops/source/voc_op.h"
 
 #include <algorithm>
 #include <fstream>
 #include <iomanip>
 #include "./tinyxml2.h"
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 using tinyxml2::XMLDocument;
 using tinyxml2::XMLElement;
@@ -69,7 +70,7 @@ Status VOCOp::Builder::Build(std::shared_ptr<VOCOp> *ptr) {
     RETURN_IF_NOT_OK(builder_schema_->AddColumn(
       ColDescriptor(std::string(kColumnImage), DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
     RETURN_IF_NOT_OK(builder_schema_->AddColumn(
-      ColDescriptor(std::string(kColumnAnnotation), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
+      ColDescriptor(std::string(kColumnAnnotation), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1)));
   }
   *ptr = std::make_shared<VOCOp>(builder_task_type_, builder_task_mode_, builder_dir_, builder_labels_to_read_,
                                  builder_num_workers_, builder_rows_per_buffer_, builder_op_connector_size_,
@@ -308,30 +309,30 @@ Status VOCOp::ParseAnnotationBbox(const std::string &path) {
   }
   while (object != nullptr) {
     std::string label_name;
-    uint32_t xmin = 0, ymin = 0, xmax = 0, ymax = 0, truncated = 0, difficult = 0;
+    float xmin = 0.0, ymin = 0.0, xmax = 0.0, ymax = 0.0, truncated = 0.0, difficult = 0.0;
     XMLElement *name_node = object->FirstChildElement("name");
     if (name_node != nullptr && name_node->GetText() != 0) label_name = name_node->GetText();
     XMLElement *truncated_node = object->FirstChildElement("truncated");
-    if (truncated_node != nullptr) truncated = truncated_node->UnsignedText();
+    if (truncated_node != nullptr) truncated = truncated_node->FloatText();
     XMLElement *difficult_node = object->FirstChildElement("difficult");
-    if (difficult_node != nullptr) difficult = difficult_node->UnsignedText();
+    if (difficult_node != nullptr) difficult = difficult_node->FloatText();
 
     XMLElement *bbox_node = object->FirstChildElement("bndbox");
     if (bbox_node != nullptr) {
       XMLElement *xmin_node = bbox_node->FirstChildElement("xmin");
-      if (xmin_node != nullptr) xmin = xmin_node->UnsignedText();
+      if (xmin_node != nullptr) xmin = xmin_node->FloatText();
       XMLElement *ymin_node = bbox_node->FirstChildElement("ymin");
-      if (ymin_node != nullptr) ymin = ymin_node->UnsignedText();
+      if (ymin_node != nullptr) ymin = ymin_node->FloatText();
       XMLElement *xmax_node = bbox_node->FirstChildElement("xmax");
-      if (xmax_node != nullptr) xmax = xmax_node->UnsignedText();
+      if (xmax_node != nullptr) xmax = xmax_node->FloatText();
       XMLElement *ymax_node = bbox_node->FirstChildElement("ymax");
-      if (ymax_node != nullptr) ymax = ymax_node->UnsignedText();
+      if (ymax_node != nullptr) ymax = ymax_node->FloatText();
     } else {
       RETURN_STATUS_UNEXPECTED("bndbox dismatch in " + path);
     }
     if (label_name != "" && (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) && xmin > 0 &&
         ymin > 0 && xmax > xmin && ymax > ymin) {
-      std::vector<uint32_t> bbox_list = {xmin, ymin, xmax - xmin, ymax - ymin, truncated, difficult};
+      std::vector<float> bbox_list = {xmin, ymin, xmax - xmin, ymax - ymin, truncated, difficult};
       bbox.emplace_back(std::make_pair(label_name, bbox_list));
       label_index_[label_name] = 0;
     }
@@ -376,17 +377,17 @@ Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &co
 Status VOCOp::ReadAnnotationToTensor(const std::string &path, const ColDescriptor &col,
                                      std::shared_ptr<Tensor> *tensor) {
   Bbox bbox_info = label_map_[path];
-  std::vector<uint32_t> bbox_row;
+  std::vector<float> bbox_row;
   dsize_t bbox_column_num = 0, bbox_num = 0;
   for (auto box : bbox_info) {
     if (label_index_.find(box.first) != label_index_.end()) {
-      std::vector<uint32_t> bbox;
-      if (class_index_.find(box.first) != class_index_.end()) {
-        bbox.emplace_back(class_index_[box.first]);
-      } else {
-        bbox.emplace_back(label_index_[box.first]);
-      }
+      std::vector<float> bbox;
       bbox.insert(bbox.end(), box.second.begin(), box.second.end());
+      if (class_index_.find(box.first) != class_index_.end()) {
+        bbox.push_back(static_cast<float>(class_index_[box.first]));
+      } else {
+        bbox.push_back(static_cast<float>(label_index_[box.first]));
+      }
       bbox_row.insert(bbox_row.end(), bbox.begin(), bbox.end());
       if (bbox_column_num == 0) {
         bbox_column_num = static_cast<dsize_t>(bbox.size());
@@ -449,6 +450,11 @@ Status VOCOp::GetClassIndexing(const std::string &dir, const std::string &task_t
 
   return Status::OK();
 }
+// Visitor accept method for NodePass
+Status VOCOp::Accept(NodePass *p, bool *modified) {
+  // Downcast shared pointer then call visitor
+  return p->RunOnNode(shared_from_base<VOCOp>(), modified);
+}
 
 Status VOCOp::ComputeColMap() {
   // Set the column name map (base class field)
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h
similarity index 91%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h
index 89875341cae..e0c46c7a946 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h
@@ -22,17 +22,17 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/parallel_op.h"
-#include "dataset/engine/datasetops/source/io_block.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/path.h"
-#include "dataset/util/queue.h"
-#include "dataset/util/status.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
 
 namespace mindspore {
 namespace dataset {
@@ -40,7 +40,7 @@ namespace dataset {
 template <typename T>
 class Queue;
 
-using Bbox = std::vector<std::pair<std::string, std::vector<uint32_t>>>;
+using Bbox = std::vector<std::pair<std::string, std::vector<float>>>;
 
 class VOCOp : public ParallelOp, public RandomAccessOp {
  public:
@@ -205,6 +205,12 @@ class VOCOp : public ParallelOp, public RandomAccessOp {
   static Status GetClassIndexing(const std::string &dir, const std::string &task_type, const std::string &task_mode,
                                  const py::dict &dict, std::map<std::string, int32_t> *output_class_indexing);
 
+  /// \brief Base-class override for NodePass visitor acceptor
+  /// \param[in] p Pointer to the NodePass to be accepted
+  /// \param[out] modified Indicator if the node was changed at all
+  /// \return Status of the node visit
+  Status Accept(NodePass *p, bool *modified) override;
+
   // Op name getter
   // @return Name of the current Op
   std::string Name() const override { return "VOCOp"; }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.cc
similarity index 91%
rename from mindspore/ccsrc/dataset/engine/datasetops/take_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.cc
index 8bc449cdc93..d1f07983f77 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.cc
@@ -17,12 +17,12 @@
 #include <utility>
 
 #include "common/utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/datasetops/take_op.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/datasetops/take_op.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -127,12 +127,6 @@ Status TakeOp::FillBuffer(std::unique_ptr<DataBuffer> *buffer, std::unique_ptr<D
   return Status::OK();
 }
 
-Status TakeOp::PrepareNodePostAction() {
-  RETURN_IF_NOT_OK(PipelineOp::PrepareNodePostAction());
-  tree_->AddToEOEOpStack(shared_from_this());
-  return Status::OK();
-}
-
 // Visitor accept method for NodePass
 Status TakeOp::Accept(NodePass *p, bool *modified) {
   // Downcast shared pointer then call visitor
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/take_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/engine/datasetops/take_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.h
index 9619a4409d5..7f3f821bd8e 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/take_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.h
@@ -19,7 +19,7 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -78,12 +78,6 @@ class TakeOp : public PipelineOp {
   // @return Status - The error code return
   Status operator()() override;
 
-  // During tree prepare phase, operators may have specific post-operations to perform depending on
-  // their role.
-  // @notes Derived versions of this function should always call it's superclass version first
-  // before providing their own implementations.
-  Status PrepareNodePostAction() override;
-
   // Base-class override for NodePass visitor acceptor.
   // @param p - Pointer to the NodePass to be accepted.
   // @param modified - Whether this node visit modified the pipeline.
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc
index 70bce16a89f..88019c30fc4 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/datasetops/zip_op.h"
+#include "minddata/dataset/engine/datasetops/zip_op.h"
 #include <utility>
 #include <iomanip>
-#include "dataset/core/constants.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/db_connector.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/datasetops/zip_op.h
rename to mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.h
index fad3c22eaaf..c9466e26e2f 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.h
@@ -22,10 +22,10 @@
 #include <unordered_map>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/engine/dataset_iterator.h"
-#include "dataset/engine/datasetops/pipeline_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/dataset_iterator.h"
+#include "minddata/dataset/engine/datasetops/pipeline_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/db_connector.h b/mindspore/ccsrc/minddata/dataset/engine/db_connector.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/db_connector.h
rename to mindspore/ccsrc/minddata/dataset/engine/db_connector.h
index 54909f51ba4..4a5c20bc126 100644
--- a/mindspore/ccsrc/dataset/engine/db_connector.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/db_connector.h
@@ -18,9 +18,9 @@
 
 #include <memory>
 #include <utility>
-#include "dataset/engine/connector.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/core/constants.h"
+#include "minddata/dataset/engine/connector.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/core/constants.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/execution_tree.cc b/mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc
similarity index 84%
rename from mindspore/ccsrc/dataset/engine/execution_tree.cc
rename to mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc
index 385722e2570..55dec24e794 100644
--- a/mindspore/ccsrc/dataset/engine/execution_tree.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc
@@ -13,16 +13,19 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/execution_tree.h"
 #include <iostream>
 #include <string>
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/engine/datasetops/shuffle_op.h"
-#include "dataset/util/task_manager.h"
-#include "dataset/engine/opt/pass.h"
-#include "dataset/engine/opt/pre/removal_pass.h"
-#include "dataset/engine/perf/profiling.h"
-#include "dataset/engine/perf/monitor.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/opt/pre/removal_pass.h"
+#include "minddata/dataset/engine/opt/pre/cache_transform_pass.h"
+#include "minddata/dataset/engine/opt/post/repeat_pass.h"
+#include "mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h"
+#include "minddata/dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/perf/monitor.h"
 
 namespace mindspore {
 namespace dataset {
@@ -33,6 +36,7 @@ ExecutionTree::ExecutionTree() : id_count_(0) {
   prepare_flags_ = kDePrepNone;
   perf_monitor_ = std::make_unique<Monitor>(this);
   profiling_manager_ = std::make_unique<ProfilingManager>(this);
+  optimize_ = common::GetEnv("OPTIMIZE") == "true" ? true : false;
 }
 
 // Destructor
@@ -200,8 +204,10 @@ Status ExecutionTree::Prepare() {
   // Pre optimization compulsory transformation
   RETURN_IF_NOT_OK(this->PrepareTreePreAction());
 
-  // Optimization transformation
-  RETURN_IF_NOT_OK(this->Optimize());
+  // If optional optimizations are enabled
+  if (optimize_) {
+    RETURN_IF_NOT_OK(this->Optimize());
+  }
 
   // Post optimization compulsory transformation
   RETURN_IF_NOT_OK(this->PrepareTreePostAction());
@@ -215,25 +221,47 @@ Status ExecutionTree::PrepareTreePreAction() {
   bool modified = false;
   std::vector<std::unique_ptr<Pass>> pre_actions;
   // Construct pre actions
-  MS_LOG(INFO) << "Running pre pass";
-  pre_actions.push_back(std::make_unique<RemovalPass>(RemovalPass()));
+  MS_LOG(INFO) << "Running pre pass loops.";
+  pre_actions.push_back(std::make_unique<RemovalPass>());
+  pre_actions.push_back(std::make_unique<CacheTransformPass>());
   // Apply pre action passes
   for (auto &pass : pre_actions) {
     RETURN_IF_NOT_OK(pass->Run(this, &modified));
   }
+  MS_LOG(INFO) << "Pre passes complete.";
   return Status::OK();
 }
 
 Status ExecutionTree::PrepareTreePostAction() {
   // The tree is ready to be prepared.
   tree_state_ = kDeTStatePrepare;
+
+  bool modified = false;
+  std::vector<std::unique_ptr<Pass>> post_actions;
+  // Construct pre actions
+  MS_LOG(INFO) << "Running post pass loops.";
+  post_actions.push_back(std::make_unique<RepeatPass>());
+
+  // Apply post action passes
+  for (auto &pass : post_actions) {
+    RETURN_IF_NOT_OK(pass->Run(this, &modified));
+  }
+  MS_LOG(INFO) << "Post passes complete.";
+
   return Status::OK();
 }
 
 Status ExecutionTree::Optimize() {
-  //  auto pp = new PrinterPass();
-  //  bool modified = false;
-  //  pp->Run(this, &modified);
+  // Vector of optimizations, currently only 1, add more as necessary
+  std::vector<std::unique_ptr<NodePass>> optimizations;
+  optimizations.push_back(std::make_unique<TensorOpFusionPass>());
+  // vector of flags for each optimization
+  std::vector<bool> modified(optimizations.size(), false);
+  for (auto i = 0; i < optimizations.size(); i++) {
+    auto m = false;
+    optimizations[i]->Run(this, &m);
+    modified[i] = m;
+  }
   return Status::OK();
 }
 
@@ -280,31 +308,5 @@ Status ExecutionTree::PrepareNode(const std::shared_ptr<DatasetOp> &dataset_op)
 
   return Status::OK();
 }
-
-// Adds an operator to the eoe operator stack during prepare phase.
-void ExecutionTree::AddToEOEOpStack(std::shared_ptr<DatasetOp> dataset_op) { eoe_stack_.push(dataset_op); }
-
-// Pops an operator from the eoe operator stack during prepare phase.
-std::shared_ptr<DatasetOp> ExecutionTree::PopFromEOEOpStack() {
-  std::shared_ptr<DatasetOp> top_op = nullptr;
-  if (!eoe_stack_.empty()) {
-    top_op = eoe_stack_.top();
-    eoe_stack_.pop();
-  }
-  return top_op;
-}
-
-// Adds a sampler to the sampler stack during prepare phase.
-void ExecutionTree::AddToSamplerStack(std::shared_ptr<Sampler> sampler) { sampler_stack_.push(sampler); }
-
-// Pops an operator from the sampler stack during prepare phase.
-std::shared_ptr<Sampler> ExecutionTree::PopFromSamplerStack() {
-  std::shared_ptr<Sampler> top_sampler = nullptr;
-  if (!sampler_stack_.empty()) {
-    top_sampler = sampler_stack_.top();
-    sampler_stack_.pop();
-  }
-  return top_sampler;
-}
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/engine/execution_tree.h b/mindspore/ccsrc/minddata/dataset/engine/execution_tree.h
similarity index 88%
rename from mindspore/ccsrc/dataset/engine/execution_tree.h
rename to mindspore/ccsrc/minddata/dataset/engine/execution_tree.h
index 5ebfa539adf..b62bf8e85de 100644
--- a/mindspore/ccsrc/dataset/engine/execution_tree.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/execution_tree.h
@@ -21,9 +21,9 @@
 #include <stack>
 #include <string>
 #include <vector>
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/util/status.h"
-#include "mindspore/ccsrc/dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/util/status.h"
+#include "mindspore/ccsrc/minddata/dataset/engine/perf/profiling.h"
 
 namespace mindspore {
 namespace dataset {
@@ -87,6 +87,8 @@ class ExecutionTree {
     // @return Shared pointer to the current operator
     std::shared_ptr<DatasetOp> get() { return nodes_[ind_]; }
 
+    bool operator==(const Iterator &rhs) { return nodes_[ind_] == rhs.nodes_[rhs.ind_]; }
+
     bool operator!=(const Iterator &rhs) { return nodes_[ind_] != rhs.nodes_[rhs.ind_]; }
 
     int32_t NumNodes() { return nodes_.size(); }
@@ -200,24 +202,6 @@ class ExecutionTree {
   // @return Status - The error code return
   Status PrepareNode(const std::shared_ptr<DatasetOp> &dataset_op);
 
-  /// Adds an operator to the eoe operator stack during prepare phase.
-  /// \param op - The dataset op to work add to eoe stack
-  /// \return Status - The error code return
-  void AddToEOEOpStack(std::shared_ptr<DatasetOp> dataset_op);
-
-  /// Pops an operator from the eoe operator stack during prepare phase.
-  /// \return shared_ptr to the popped operator
-  std::shared_ptr<DatasetOp> PopFromEOEOpStack();
-
-  /// Adds a sampler to the sampler stack during prepare phase.
-  /// \param samplerop - The dataset op to work add to eoe stack
-  /// \return Status - The error code return
-  void AddToSamplerStack(std::shared_ptr<Sampler> sampler);
-
-  /// Pops an operator from the sampler stack during prepare phase.
-  /// \return shared_ptr to the popped operator
-  std::shared_ptr<Sampler> PopFromSamplerStack();
-
   // Return the pointer to the TaskGroup
   // @return raw pointer to the TaskGroup
   TaskGroup *AllTasks() const { return tg_.get(); }
@@ -232,6 +216,21 @@ class ExecutionTree {
   // Getter for profiling manager, no ownership
   ProfilingManager *GetProfilingManager() { return profiling_manager_.get(); }
 
+  // Set optional optimization if tree has not been prepared yet
+  Status SetOptimize(bool value) {
+    if (tree_state_ != kDeTStateInit && tree_state_ != kDeTStateBuilding) {
+      std::string optimize = (optimize_ == true) ? "true" : "false";
+      std::string msg = "Tree has already been prepared with OPTIMIZE set to " + optimize;
+      RETURN_STATUS_UNEXPECTED(msg);
+    } else {
+      optimize_ = value;
+      return Status::OK();
+    }
+  }
+
+  // Optional optimizations status
+  bool OptimizationEnabled() const { return optimize_; }
+
  private:
   // A helper functions for doing the recursive printing
   // @param dataset_op - The dataset op to print
@@ -248,9 +247,10 @@ class ExecutionTree {
   TreeState tree_state_;                                 // Tracking the current tree state
   std::unique_ptr<Monitor> perf_monitor_;                // Performance Monitor
   std::unique_ptr<ProfilingManager> profiling_manager_;  // Profiling manager
-  std::stack<std::shared_ptr<DatasetOp>> eoe_stack_;     // A stack used during prepare phase
-  std::stack<std::shared_ptr<Sampler>> sampler_stack_;   // A stack used during prepare phase
+  bool optimize_;                                        // Flag to enable optional optimizations
 };
+
+inline bool operator==(const ExecutionTree::Iterator &lhs, const ExecutionTree::Iterator &rhs) { return lhs == rhs; }
 }  // namespace dataset
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/gnn/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/dataset/engine/gnn/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/CMakeLists.txt
diff --git a/mindspore/ccsrc/dataset/engine/gnn/edge.h b/mindspore/ccsrc/minddata/dataset/engine/gnn/edge.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/gnn/edge.h
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/edge.h
index 47314d97c24..c62c088bab7 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/edge.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/edge.h
@@ -20,9 +20,9 @@
 #include <unordered_map>
 #include <utility>
 
-#include "dataset/util/status.h"
-#include "dataset/engine/gnn/feature.h"
-#include "dataset/engine/gnn/node.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/gnn/feature.h"
+#include "minddata/dataset/engine/gnn/node.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/gnn/feature.cc b/mindspore/ccsrc/minddata/dataset/engine/gnn/feature.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/gnn/feature.cc
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/feature.cc
index e4579478217..dba4a6fa609 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/feature.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/feature.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/gnn/feature.h"
+#include "minddata/dataset/engine/gnn/feature.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/gnn/feature.h b/mindspore/ccsrc/minddata/dataset/engine/gnn/feature.h
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/gnn/feature.h
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/feature.h
index 7ce5967fbd3..0d7eba1009c 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/feature.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/feature.h
@@ -18,8 +18,8 @@
 
 #include <memory>
 
-#include "dataset/core/tensor.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/gnn/graph.cc b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph.cc
similarity index 82%
rename from mindspore/ccsrc/dataset/engine/gnn/graph.cc
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/graph.cc
index a143bd4e386..9083eb4c4b3 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/graph.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/gnn/graph.h"
+#include "minddata/dataset/engine/gnn/graph.h"
 
 #include <algorithm>
 #include <functional>
@@ -21,8 +21,8 @@
 #include <numeric>
 #include <utility>
 
-#include "dataset/core/tensor_shape.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
@@ -125,13 +125,8 @@ Status Graph::GetNodesFromEdges(const std::vector<EdgeIdType> &edge_list, std::s
 
 Status Graph::GetAllNeighbors(const std::vector<NodeIdType> &node_list, NodeType neighbor_type,
                               std::shared_ptr<Tensor> *out) {
-  if (node_list.empty()) {
-    RETURN_STATUS_UNEXPECTED("Input node_list is empty.");
-  }
-  if (node_type_map_.find(neighbor_type) == node_type_map_.end()) {
-    std::string err_msg = "Invalid neighbor type:" + std::to_string(neighbor_type);
-    RETURN_STATUS_UNEXPECTED(err_msg);
-  }
+  CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty.");
+  RETURN_IF_NOT_OK(CheckNeighborType(neighbor_type));
 
   std::vector<std::vector<NodeIdType>> neighbors;
   size_t max_neighbor_num = 0;
@@ -161,6 +156,14 @@ Status Graph::CheckSamplesNum(NodeIdType samples_num) {
   return Status::OK();
 }
 
+Status Graph::CheckNeighborType(NodeType neighbor_type) {
+  if (node_type_map_.find(neighbor_type) == node_type_map_.end()) {
+    std::string err_msg = "Invalid neighbor type:" + std::to_string(neighbor_type);
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  return Status::OK();
+}
+
 Status Graph::GetSampledNeighbors(const std::vector<NodeIdType> &node_list,
                                   const std::vector<NodeIdType> &neighbor_nums,
                                   const std::vector<NodeType> &neighbor_types, std::shared_ptr<Tensor> *out) {
@@ -171,10 +174,7 @@ Status Graph::GetSampledNeighbors(const std::vector<NodeIdType> &node_list,
     RETURN_IF_NOT_OK(CheckSamplesNum(num));
   }
   for (const auto &type : neighbor_types) {
-    if (node_type_map_.find(type) == node_type_map_.end()) {
-      std::string err_msg = "Invalid neighbor type:" + std::to_string(type);
-      RETURN_STATUS_UNEXPECTED(err_msg);
-    }
+    RETURN_IF_NOT_OK(CheckNeighborType(type));
   }
   std::vector<std::vector<NodeIdType>> neighbors_vec(node_list.size());
   for (size_t node_idx = 0; node_idx < node_list.size(); ++node_idx) {
@@ -228,44 +228,36 @@ Status Graph::GetNegSampledNeighbors(const std::vector<NodeIdType> &node_list, N
                                      NodeType neg_neighbor_type, std::shared_ptr<Tensor> *out) {
   CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty.");
   RETURN_IF_NOT_OK(CheckSamplesNum(samples_num));
-  if (node_type_map_.find(neg_neighbor_type) == node_type_map_.end()) {
-    std::string err_msg = "Invalid neighbor type:" + std::to_string(neg_neighbor_type);
-    RETURN_STATUS_UNEXPECTED(err_msg);
-  }
+  RETURN_IF_NOT_OK(CheckNeighborType(neg_neighbor_type));
 
-  std::vector<std::vector<NodeIdType>> neighbors_vec;
-  neighbors_vec.resize(node_list.size());
+  std::vector<std::vector<NodeIdType>> neg_neighbors_vec;
+  neg_neighbors_vec.resize(node_list.size());
   for (size_t node_idx = 0; node_idx < node_list.size(); ++node_idx) {
     std::shared_ptr<Node> node;
     RETURN_IF_NOT_OK(GetNodeByNodeId(node_list[node_idx], &node));
     std::vector<NodeIdType> neighbors;
     RETURN_IF_NOT_OK(node->GetAllNeighbors(neg_neighbor_type, &neighbors));
-    std::unordered_set<NodeIdType> exclude_node;
+    std::unordered_set<NodeIdType> exclude_nodes;
     std::transform(neighbors.begin(), neighbors.end(),
-                   std::insert_iterator<std::unordered_set<NodeIdType>>(exclude_node, exclude_node.begin()),
+                   std::insert_iterator<std::unordered_set<NodeIdType>>(exclude_nodes, exclude_nodes.begin()),
                    [](const NodeIdType node) { return node; });
-    auto itr = node_type_map_.find(neg_neighbor_type);
-    if (itr == node_type_map_.end()) {
-      std::string err_msg = "Invalid node type:" + std::to_string(neg_neighbor_type);
-      RETURN_STATUS_UNEXPECTED(err_msg);
+    const std::vector<NodeIdType> &all_nodes = node_type_map_[neg_neighbor_type];
+    neg_neighbors_vec[node_idx].emplace_back(node->id());
+    if (all_nodes.size() > exclude_nodes.size()) {
+      while (neg_neighbors_vec[node_idx].size() < samples_num + 1) {
+        RETURN_IF_NOT_OK(NegativeSample(all_nodes, exclude_nodes, samples_num - neg_neighbors_vec[node_idx].size(),
+                                        &neg_neighbors_vec[node_idx]));
+      }
     } else {
-      neighbors_vec[node_idx].emplace_back(node->id());
-      if (itr->second.size() > exclude_node.size()) {
-        while (neighbors_vec[node_idx].size() < samples_num + 1) {
-          RETURN_IF_NOT_OK(NegativeSample(itr->second, exclude_node, samples_num - neighbors_vec[node_idx].size(),
-                                          &neighbors_vec[node_idx]));
-        }
-      } else {
-        MS_LOG(DEBUG) << "There are no negative neighbors. node_id:" << node->id()
-                      << " neg_neighbor_type:" << neg_neighbor_type;
-        // If there are no negative neighbors, they are filled with kDefaultNodeId
-        for (int32_t i = 0; i < samples_num; ++i) {
-          neighbors_vec[node_idx].emplace_back(kDefaultNodeId);
-        }
+      MS_LOG(DEBUG) << "There are no negative neighbors. node_id:" << node->id()
+                    << " neg_neighbor_type:" << neg_neighbor_type;
+      // If there are no negative neighbors, they are filled with kDefaultNodeId
+      for (int32_t i = 0; i < samples_num; ++i) {
+        neg_neighbors_vec[node_idx].emplace_back(kDefaultNodeId);
       }
     }
   }
-  RETURN_IF_NOT_OK(CreateTensorByVector<NodeIdType>(neighbors_vec, DataType(DataType::DE_INT32), out));
+  RETURN_IF_NOT_OK(CreateTensorByVector<NodeIdType>(neg_neighbors_vec, DataType(DataType::DE_INT32), out));
   return Status::OK();
 }
 
@@ -280,8 +272,19 @@ Status Graph::RandomWalk(const std::vector<NodeIdType> &node_list, const std::ve
 }
 
 Status Graph::GetNodeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) {
-  auto itr = default_feature_map_.find(feature_type);
-  if (itr == default_feature_map_.end()) {
+  auto itr = default_node_feature_map_.find(feature_type);
+  if (itr == default_node_feature_map_.end()) {
+    std::string err_msg = "Invalid feature type:" + std::to_string(feature_type);
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  } else {
+    *out_feature = itr->second;
+  }
+  return Status::OK();
+}
+
+Status Graph::GetEdgeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) {
+  auto itr = default_edge_feature_map_.find(feature_type);
+  if (itr == default_edge_feature_map_.end()) {
     std::string err_msg = "Invalid feature type:" + std::to_string(feature_type);
     RETURN_STATUS_UNEXPECTED(err_msg);
   } else {
@@ -295,7 +298,7 @@ Status Graph::GetNodeFeature(const std::shared_ptr<Tensor> &nodes, const std::ve
   if (!nodes || nodes->Size() == 0) {
     RETURN_STATUS_UNEXPECTED("Input nodes is empty");
   }
-  CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Inpude feature_types is empty");
+  CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty");
   TensorRow tensors;
   for (const auto &f_type : feature_types) {
     std::shared_ptr<Feature> default_feature;
@@ -340,6 +343,45 @@ Status Graph::GetNodeFeature(const std::shared_ptr<Tensor> &nodes, const std::ve
 
 Status Graph::GetEdgeFeature(const std::shared_ptr<Tensor> &edges, const std::vector<FeatureType> &feature_types,
                              TensorRow *out) {
+  if (!edges || edges->Size() == 0) {
+    RETURN_STATUS_UNEXPECTED("Input edges is empty");
+  }
+  CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty");
+  TensorRow tensors;
+  for (const auto &f_type : feature_types) {
+    std::shared_ptr<Feature> default_feature;
+    // If no feature can be obtained, fill in the default value
+    RETURN_IF_NOT_OK(GetEdgeDefaultFeature(f_type, &default_feature));
+
+    TensorShape shape(default_feature->Value()->shape());
+    auto shape_vec = edges->shape().AsVector();
+    dsize_t size = std::accumulate(shape_vec.begin(), shape_vec.end(), 1, std::multiplies<dsize_t>());
+    shape = shape.PrependDim(size);
+    std::shared_ptr<Tensor> fea_tensor;
+    RETURN_IF_NOT_OK(
+      Tensor::CreateTensor(&fea_tensor, TensorImpl::kFlexible, shape, default_feature->Value()->type(), nullptr));
+
+    dsize_t index = 0;
+    for (auto edge_itr = edges->begin<EdgeIdType>(); edge_itr != edges->end<EdgeIdType>(); ++edge_itr) {
+      std::shared_ptr<Edge> edge;
+      RETURN_IF_NOT_OK(GetEdgeByEdgeId(*edge_itr, &edge));
+      std::shared_ptr<Feature> feature;
+      if (!edge->GetFeatures(f_type, &feature).IsOk()) {
+        feature = default_feature;
+      }
+      RETURN_IF_NOT_OK(fea_tensor->InsertTensor({index}, feature->Value()));
+      index++;
+    }
+
+    TensorShape reshape(edges->shape());
+    for (auto s : default_feature->Value()->shape().AsVector()) {
+      reshape = reshape.AppendDim(s);
+    }
+    RETURN_IF_NOT_OK(fea_tensor->Reshape(reshape));
+    fea_tensor->Squeeze();
+    tensors.push_back(fea_tensor);
+  }
+  *out = std::move(tensors);
   return Status::OK();
 }
 
@@ -387,6 +429,7 @@ Status Graph::GetMetaInfo(MetaInfo *meta_info) {
   return Status::OK();
 }
 
+#ifdef ENABLE_PYTHON
 Status Graph::GraphInfo(py::dict *out) {
   MetaInfo meta_info;
   RETURN_IF_NOT_OK(GetMetaInfo(&meta_info));
@@ -398,6 +441,7 @@ Status Graph::GraphInfo(py::dict *out) {
   (*out)["edge_feature_type"] = py::cast(meta_info.edge_feature_type);
   return Status::OK();
 }
+#endif
 
 Status Graph::LoadNodeAndEdge() {
   GraphLoader gl(dataset_file_, num_workers_);
@@ -405,7 +449,8 @@ Status Graph::LoadNodeAndEdge() {
   RETURN_IF_NOT_OK(gl.InitAndLoad());
   // get all maps
   RETURN_IF_NOT_OK(gl.GetNodesAndEdges(&node_id_map_, &edge_id_map_, &node_type_map_, &edge_type_map_,
-                                       &node_feature_map_, &edge_feature_map_, &default_feature_map_));
+                                       &node_feature_map_, &edge_feature_map_, &default_node_feature_map_,
+                                       &default_edge_feature_map_));
   return Status::OK();
 }
 
@@ -420,18 +465,33 @@ Status Graph::GetNodeByNodeId(NodeIdType id, std::shared_ptr<Node> *node) {
   return Status::OK();
 }
 
+Status Graph::GetEdgeByEdgeId(EdgeIdType id, std::shared_ptr<Edge> *edge) {
+  auto itr = edge_id_map_.find(id);
+  if (itr == edge_id_map_.end()) {
+    std::string err_msg = "Invalid edge id:" + std::to_string(id);
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  } else {
+    *edge = itr->second;
+  }
+  return Status::OK();
+}
+
 Graph::RandomWalkBase::RandomWalkBase(Graph *graph)
     : graph_(graph), step_home_param_(1.0), step_away_param_(1.0), default_node_(-1), num_walks_(1), num_workers_(1) {}
 
 Status Graph::RandomWalkBase::Build(const std::vector<NodeIdType> &node_list, const std::vector<NodeType> &meta_path,
                                     float step_home_param, float step_away_param, const NodeIdType default_node,
                                     int32_t num_walks, int32_t num_workers) {
+  CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty.");
   node_list_ = node_list;
   if (meta_path.empty() || meta_path.size() > kMaxNumWalks) {
     std::string err_msg = "Failed, meta path required between 1 and " + std::to_string(kMaxNumWalks) +
                           ". The size of input path is " + std::to_string(meta_path.size());
     RETURN_STATUS_UNEXPECTED(err_msg);
   }
+  for (const auto &type : meta_path) {
+    RETURN_IF_NOT_OK(graph_->CheckNeighborType(type));
+  }
   meta_path_ = meta_path;
   if (step_home_param < kGnnEpsilon || step_away_param < kGnnEpsilon) {
     std::string err_msg = "Failed, step_home_param and step_away_param required greater than " +
@@ -439,6 +499,18 @@ Status Graph::RandomWalkBase::Build(const std::vector<NodeIdType> &node_list, co
                           ", step_away_param: " + std::to_string(step_away_param);
     RETURN_STATUS_UNEXPECTED(err_msg);
   }
+  if (default_node < -1) {
+    std::string err_msg = "Failed, default_node required to be greater or equal to -1.";
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  if (num_walks <= 0) {
+    std::string err_msg = "Failed, num_walks parameter required to be greater than 0";
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  if (num_workers <= 0) {
+    std::string err_msg = "Failed, num_workers parameter required to be greater than 0";
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
   step_home_param_ = step_home_param;
   step_away_param_ = step_away_param;
   default_node_ = default_node;
@@ -488,15 +560,10 @@ Status Graph::RandomWalkBase::Node2vecWalk(const NodeIdType &start_node, std::ve
 }
 
 Status Graph::RandomWalkBase::SimulateWalk(std::vector<std::vector<NodeIdType>> *walks) {
-  // Repeatedly simulate random walks from each node
-  std::vector<uint32_t> permutation(node_list_.size());
-  std::iota(permutation.begin(), permutation.end(), 0);
   for (int32_t i = 0; i < num_walks_; i++) {
-    unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
-    std::shuffle(permutation.begin(), permutation.end(), std::default_random_engine(seed));
-    for (const auto &i_perm : permutation) {
+    for (const auto &node : node_list_) {
       std::vector<NodeIdType> walk;
-      RETURN_IF_NOT_OK(Node2vecWalk(node_list_[i_perm], &walk));
+      RETURN_IF_NOT_OK(Node2vecWalk(node, &walk));
       walks->push_back(walk);
     }
   }
diff --git a/mindspore/ccsrc/dataset/engine/gnn/graph.h b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph.h
similarity index 90%
rename from mindspore/ccsrc/dataset/engine/gnn/graph.h
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/graph.h
index 344a6c6bf21..76930d91f28 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/graph.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph.h
@@ -25,13 +25,13 @@
 #include <vector>
 #include <utility>
 
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_row.h"
-#include "dataset/engine/gnn/graph_loader.h"
-#include "dataset/engine/gnn/feature.h"
-#include "dataset/engine/gnn/node.h"
-#include "dataset/engine/gnn/edge.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_row.h"
+#include "minddata/dataset/engine/gnn/graph_loader.h"
+#include "minddata/dataset/engine/gnn/feature.h"
+#include "minddata/dataset/engine/gnn/node.h"
+#include "minddata/dataset/engine/gnn/edge.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -140,8 +140,10 @@ class Graph {
   // @return Status - The error code return
   Status GetMetaInfo(MetaInfo *meta_info);
 
+#ifdef ENABLE_PYTHON
   // Return meta information to python layer
   Status GraphInfo(py::dict *out);
+#endif
 
   Status Init();
 
@@ -181,7 +183,7 @@ class Graph {
     float step_away_param_;  // Inout hyper parameter. Default is 1.0
     NodeIdType default_node_;
 
-    int32_t num_walks_;    // Number of walks per source. Default is 10
+    int32_t num_walks_;    // Number of walks per source. Default is 1
     int32_t num_workers_;  // The number of worker threads. Default is 1
   };
 
@@ -211,12 +213,24 @@ class Graph {
   // @return Status - The error code return
   Status GetNodeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature);
 
+  // Get the default feature of a edge
+  // @param FeatureType feature_type -
+  // @param std::shared_ptr<Feature> *out_feature - Returned feature
+  // @return Status - The error code return
+  Status GetEdgeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature);
+
   // Find node object using node id
   // @param NodeIdType id -
   // @param std::shared_ptr<Node> *node - Returned node object
   // @return Status - The error code return
   Status GetNodeByNodeId(NodeIdType id, std::shared_ptr<Node> *node);
 
+  // Find edge object using edge id
+  // @param EdgeIdType id -
+  // @param std::shared_ptr<Node> *edge - Returned edge object
+  // @return Status - The error code return
+  Status GetEdgeByEdgeId(EdgeIdType id, std::shared_ptr<Edge> *edge);
+
   // Negative sampling
   // @param std::vector<NodeIdType> &input_data - The data set to be sampled
   // @param std::unordered_set<NodeIdType> &exclude_data - Data to be excluded
@@ -228,6 +242,8 @@ class Graph {
 
   Status CheckSamplesNum(NodeIdType samples_num);
 
+  Status CheckNeighborType(NodeType neighbor_type);
+
   std::string dataset_file_;
   int32_t num_workers_;  // The number of worker threads
   std::mt19937 rnd_;
@@ -242,7 +258,8 @@ class Graph {
   std::unordered_map<NodeType, std::unordered_set<FeatureType>> node_feature_map_;
   std::unordered_map<EdgeType, std::unordered_set<FeatureType>> edge_feature_map_;
 
-  std::unordered_map<FeatureType, std::shared_ptr<Feature>> default_feature_map_;
+  std::unordered_map<FeatureType, std::shared_ptr<Feature>> default_node_feature_map_;
+  std::unordered_map<FeatureType, std::shared_ptr<Feature>> default_edge_feature_map_;
 };
 }  // namespace gnn
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/engine/gnn/graph_loader.cc b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_loader.cc
similarity index 87%
rename from mindspore/ccsrc/dataset/engine/gnn/graph_loader.cc
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/graph_loader.cc
index 6504d088bf2..9d2c6211f40 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/graph_loader.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_loader.cc
@@ -18,11 +18,11 @@
 #include <tuple>
 #include <utility>
 
-#include "dataset/engine/gnn/graph_loader.h"
-#include "mindspore/ccsrc/mindrecord/include/shard_error.h"
-#include "dataset/engine/gnn/local_edge.h"
-#include "dataset/engine/gnn/local_node.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/engine/gnn/graph_loader.h"
+#include "mindspore/ccsrc/minddata/mindrecord/include/shard_error.h"
+#include "minddata/dataset/engine/gnn/local_edge.h"
+#include "minddata/dataset/engine/gnn/local_node.h"
+#include "minddata/dataset/util/task_manager.h"
 
 using ShardTuple = std::vector<std::tuple<std::vector<uint8_t>, mindspore::mindrecord::json>>;
 
@@ -41,7 +41,8 @@ GraphLoader::GraphLoader(std::string mr_filepath, int32_t num_workers)
 
 Status GraphLoader::GetNodesAndEdges(NodeIdMap *n_id_map, EdgeIdMap *e_id_map, NodeTypeMap *n_type_map,
                                      EdgeTypeMap *e_type_map, NodeFeatureMap *n_feature_map,
-                                     EdgeFeatureMap *e_feature_map, DefaultFeatureMap *default_feature_map) {
+                                     EdgeFeatureMap *e_feature_map, DefaultNodeFeatureMap *default_node_feature_map,
+                                     DefaultEdgeFeatureMap *default_edge_feature_map) {
   for (std::deque<std::shared_ptr<Node>> &dq : n_deques_) {
     while (dq.empty() == false) {
       std::shared_ptr<Node> node_ptr = dq.front();
@@ -70,7 +71,7 @@ Status GraphLoader::GetNodesAndEdges(NodeIdMap *n_id_map, EdgeIdMap *e_id_map, N
   for (auto &itr : *n_type_map) itr.second.shrink_to_fit();
   for (auto &itr : *e_type_map) itr.second.shrink_to_fit();
 
-  MergeFeatureMaps(n_feature_map, e_feature_map, default_feature_map);
+  MergeFeatureMaps(n_feature_map, e_feature_map, default_node_feature_map, default_edge_feature_map);
   return Status::OK();
 }
 
@@ -81,7 +82,8 @@ Status GraphLoader::InitAndLoad() {
   e_deques_.resize(num_workers_);
   n_feature_maps_.resize(num_workers_);
   e_feature_maps_.resize(num_workers_);
-  default_feature_maps_.resize(num_workers_);
+  default_node_feature_maps_.resize(num_workers_);
+  default_edge_feature_maps_.resize(num_workers_);
   TaskGroup vg;
 
   shard_reader_ = std::make_unique<ShardReader>();
@@ -109,7 +111,7 @@ Status GraphLoader::InitAndLoad() {
 
 Status GraphLoader::LoadNode(const std::vector<uint8_t> &col_blob, const mindrecord::json &col_jsn,
                              std::shared_ptr<Node> *node, NodeFeatureMap *feature_map,
-                             DefaultFeatureMap *default_feature) {
+                             DefaultNodeFeatureMap *default_feature) {
   NodeIdType node_id = col_jsn["first_id"];
   NodeType node_type = static_cast<NodeType>(col_jsn["type"]);
   (*node) = std::make_shared<LocalNode>(node_id, node_type);
@@ -133,7 +135,7 @@ Status GraphLoader::LoadNode(const std::vector<uint8_t> &col_blob, const mindrec
 
 Status GraphLoader::LoadEdge(const std::vector<uint8_t> &col_blob, const mindrecord::json &col_jsn,
                              std::shared_ptr<Edge> *edge, EdgeFeatureMap *feature_map,
-                             DefaultFeatureMap *default_feature) {
+                             DefaultEdgeFeatureMap *default_feature) {
   EdgeIdType edge_id = col_jsn["first_id"];
   EdgeType edge_type = static_cast<EdgeType>(col_jsn["type"]);
   NodeIdType src_id = col_jsn["second_id"], dst_id = col_jsn["third_id"];
@@ -214,13 +216,13 @@ Status GraphLoader::WorkerEntry(int32_t worker_id) {
       std::string attr = col_jsn["attribute"];
       if (attr == "n") {
         std::shared_ptr<Node> node_ptr;
-        RETURN_IF_NOT_OK(
-          LoadNode(col_blob, col_jsn, &node_ptr, &(n_feature_maps_[worker_id]), &default_feature_maps_[worker_id]));
+        RETURN_IF_NOT_OK(LoadNode(col_blob, col_jsn, &node_ptr, &(n_feature_maps_[worker_id]),
+                                  &default_node_feature_maps_[worker_id]));
         n_deques_[worker_id].emplace_back(node_ptr);
       } else if (attr == "e") {
         std::shared_ptr<Edge> edge_ptr;
-        RETURN_IF_NOT_OK(
-          LoadEdge(col_blob, col_jsn, &edge_ptr, &(e_feature_maps_[worker_id]), &default_feature_maps_[worker_id]));
+        RETURN_IF_NOT_OK(LoadEdge(col_blob, col_jsn, &edge_ptr, &(e_feature_maps_[worker_id]),
+                                  &default_edge_feature_maps_[worker_id]));
         e_deques_[worker_id].emplace_back(edge_ptr);
       } else {
         MS_LOG(WARNING) << "attribute:" << attr << " is neither edge nor node.";
@@ -233,7 +235,8 @@ Status GraphLoader::WorkerEntry(int32_t worker_id) {
 }
 
 void GraphLoader::MergeFeatureMaps(NodeFeatureMap *n_feature_map, EdgeFeatureMap *e_feature_map,
-                                   DefaultFeatureMap *default_feature_map) {
+                                   DefaultNodeFeatureMap *default_node_feature_map,
+                                   DefaultEdgeFeatureMap *default_edge_feature_map) {
   for (int wkr_id = 0; wkr_id < num_workers_; wkr_id++) {
     for (auto &m : n_feature_maps_[wkr_id]) {
       for (auto &n : m.second) (*n_feature_map)[m.first].insert(n);
@@ -241,8 +244,11 @@ void GraphLoader::MergeFeatureMaps(NodeFeatureMap *n_feature_map, EdgeFeatureMap
     for (auto &m : e_feature_maps_[wkr_id]) {
       for (auto &n : m.second) (*e_feature_map)[m.first].insert(n);
     }
-    for (auto &m : default_feature_maps_[wkr_id]) {
-      (*default_feature_map)[m.first] = m.second;
+    for (auto &m : default_node_feature_maps_[wkr_id]) {
+      (*default_node_feature_map)[m.first] = m.second;
+    }
+    for (auto &m : default_edge_feature_maps_[wkr_id]) {
+      (*default_edge_feature_map)[m.first] = m.second;
     }
   }
   n_feature_maps_.clear();
diff --git a/mindspore/ccsrc/dataset/engine/gnn/graph_loader.h b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_loader.h
similarity index 82%
rename from mindspore/ccsrc/dataset/engine/gnn/graph_loader.h
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/graph_loader.h
index 0ad54bae6d8..f7f9245b8ac 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/graph_loader.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_loader.h
@@ -24,14 +24,14 @@
 #include <unordered_map>
 #include <unordered_set>
 
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/gnn/feature.h"
-#include "dataset/engine/gnn/graph.h"
-#include "dataset/engine/gnn/node.h"
-#include "dataset/engine/gnn/edge.h"
-#include "dataset/util/status.h"
-#include "mindrecord/include/shard_reader.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/gnn/feature.h"
+#include "minddata/dataset/engine/gnn/graph.h"
+#include "minddata/dataset/engine/gnn/node.h"
+#include "minddata/dataset/engine/gnn/edge.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/mindrecord/include/shard_reader.h"
 namespace mindspore {
 namespace dataset {
 namespace gnn {
@@ -43,7 +43,8 @@ using NodeTypeMap = std::unordered_map<NodeType, std::vector<NodeIdType>>;
 using EdgeTypeMap = std::unordered_map<EdgeType, std::vector<EdgeIdType>>;
 using NodeFeatureMap = std::unordered_map<NodeType, std::unordered_set<FeatureType>>;
 using EdgeFeatureMap = std::unordered_map<EdgeType, std::unordered_set<FeatureType>>;
-using DefaultFeatureMap = std::unordered_map<FeatureType, std::shared_ptr<Feature>>;
+using DefaultNodeFeatureMap = std::unordered_map<FeatureType, std::shared_ptr<Feature>>;
+using DefaultEdgeFeatureMap = std::unordered_map<FeatureType, std::shared_ptr<Feature>>;
 
 // this class interfaces with the underlying storage format (mindrecord)
 // it returns raw nodes and edges via GetNodesAndEdges
@@ -63,7 +64,7 @@ class GraphLoader {
   // random order. src_node and dst_node in Edge are node_id only with -1 as type.
   // features attached to each node and edge are expected to be filled correctly
   Status GetNodesAndEdges(NodeIdMap *, EdgeIdMap *, NodeTypeMap *, EdgeTypeMap *, NodeFeatureMap *, EdgeFeatureMap *,
-                          DefaultFeatureMap *);
+                          DefaultNodeFeatureMap *, DefaultEdgeFeatureMap *);
 
  private:
   //
@@ -77,19 +78,19 @@ class GraphLoader {
   // @param mindrecord::json &jsn - contains raw data
   // @param std::shared_ptr<Node> *node - return value
   // @param NodeFeatureMap *feature_map -
-  // @param DefaultFeatureMap *default_feature -
+  // @param DefaultNodeFeatureMap *default_feature -
   // @return Status - the status code
   Status LoadNode(const std::vector<uint8_t> &blob, const mindrecord::json &jsn, std::shared_ptr<Node> *node,
-                  NodeFeatureMap *feature_map, DefaultFeatureMap *default_feature);
+                  NodeFeatureMap *feature_map, DefaultNodeFeatureMap *default_feature);
 
   // @param std::vector<uint8_t> &blob - contains data in blob field in mindrecord
   // @param mindrecord::json &jsn - contains raw data
   // @param std::shared_ptr<Edge> *edge - return value, the edge ptr, edge is not yet connected
   // @param FeatureMap *feature_map
-  // @param DefaultFeatureMap *default_feature -
+  // @param DefaultEdgeFeatureMap *default_feature -
   // @return Status - the status code
   Status LoadEdge(const std::vector<uint8_t> &blob, const mindrecord::json &jsn, std::shared_ptr<Edge> *edge,
-                  EdgeFeatureMap *feature_map, DefaultFeatureMap *default_feature);
+                  EdgeFeatureMap *feature_map, DefaultEdgeFeatureMap *default_feature);
 
   // @param std::string key - column name
   // @param std::vector<uint8_t> &blob - contains data in blob field in mindrecord
@@ -108,7 +109,7 @@ class GraphLoader {
                            std::shared_ptr<Tensor> *tensor);
 
   // merge NodeFeatureMap and EdgeFeatureMap of each worker into 1
-  void MergeFeatureMaps(NodeFeatureMap *, EdgeFeatureMap *, DefaultFeatureMap *);
+  void MergeFeatureMaps(NodeFeatureMap *, EdgeFeatureMap *, DefaultNodeFeatureMap *, DefaultEdgeFeatureMap *);
 
   const int32_t num_workers_;
   std::atomic_int row_id_;
@@ -118,7 +119,8 @@ class GraphLoader {
   std::vector<std::deque<std::shared_ptr<Edge>>> e_deques_;
   std::vector<NodeFeatureMap> n_feature_maps_;
   std::vector<EdgeFeatureMap> e_feature_maps_;
-  std::vector<DefaultFeatureMap> default_feature_maps_;
+  std::vector<DefaultNodeFeatureMap> default_node_feature_maps_;
+  std::vector<DefaultEdgeFeatureMap> default_edge_feature_maps_;
   const std::vector<std::string> keys_;
 };
 }  // namespace gnn
diff --git a/mindspore/ccsrc/dataset/engine/gnn/local_edge.cc b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_edge.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/gnn/local_edge.cc
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/local_edge.cc
index 7465b689d5d..642c73eed3a 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/local_edge.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_edge.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/gnn/local_edge.h"
+#include "minddata/dataset/engine/gnn/local_edge.h"
 
 #include <string>
 
diff --git a/mindspore/ccsrc/dataset/engine/gnn/local_edge.h b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_edge.h
similarity index 91%
rename from mindspore/ccsrc/dataset/engine/gnn/local_edge.h
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/local_edge.h
index a34fc003739..d112972f8f7 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/local_edge.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_edge.h
@@ -20,10 +20,10 @@
 #include <unordered_map>
 #include <utility>
 
-#include "dataset/util/status.h"
-#include "dataset/engine/gnn/edge.h"
-#include "dataset/engine/gnn/feature.h"
-#include "dataset/engine/gnn/node.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/gnn/edge.h"
+#include "minddata/dataset/engine/gnn/feature.h"
+#include "minddata/dataset/engine/gnn/node.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/gnn/local_node.cc b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_node.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/gnn/local_node.cc
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/local_node.cc
index c829f8e8caf..8eaf9bb7163 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/local_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_node.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/gnn/local_node.h"
+#include "minddata/dataset/engine/gnn/local_node.h"
 
 #include <algorithm>
 #include <string>
 #include <utility>
 
-#include "dataset/engine/gnn/edge.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/engine/gnn/edge.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/gnn/local_node.h b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_node.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/gnn/local_node.h
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/local_node.h
index bc069d073fd..9c122931e70 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/local_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/local_node.h
@@ -20,9 +20,9 @@
 #include <unordered_map>
 #include <vector>
 
-#include "dataset/util/status.h"
-#include "dataset/engine/gnn/node.h"
-#include "dataset/engine/gnn/feature.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/gnn/node.h"
+#include "minddata/dataset/engine/gnn/feature.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/gnn/node.h b/mindspore/ccsrc/minddata/dataset/engine/gnn/node.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/gnn/node.h
rename to mindspore/ccsrc/minddata/dataset/engine/gnn/node.h
index 282f8567971..a7c803fee25 100644
--- a/mindspore/ccsrc/dataset/engine/gnn/node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/node.h
@@ -20,8 +20,8 @@
 #include <unordered_map>
 #include <vector>
 
-#include "dataset/util/status.h"
-#include "dataset/engine/gnn/feature.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/gnn/feature.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/jagged_connector.h b/mindspore/ccsrc/minddata/dataset/engine/jagged_connector.h
similarity index 93%
rename from mindspore/ccsrc/dataset/engine/jagged_connector.h
rename to mindspore/ccsrc/minddata/dataset/engine/jagged_connector.h
index 2058c542a8d..cee0b7abf3e 100644
--- a/mindspore/ccsrc/dataset/engine/jagged_connector.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/jagged_connector.h
@@ -20,10 +20,10 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "dataset/engine/connector.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/util/status.h"
-#include "dataset/core/constants.h"
+#include "minddata/dataset/engine/connector.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/opt/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/opt/CMakeLists.txt
similarity index 71%
rename from mindspore/ccsrc/dataset/engine/opt/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/opt/CMakeLists.txt
index 080d968cfce..0ab1fb79253 100644
--- a/mindspore/ccsrc/dataset/engine/opt/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/CMakeLists.txt
@@ -2,7 +2,11 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc"
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 add_library(engine-opt OBJECT
           pass.cc
+          post/repeat_pass.cc
+          pre/cache_pass.cc
+          pre/cache_transform_pass.cc
           pre/removal_nodes.cc
           pre/removal_pass.cc
+          optional/tensor_op_fusion_pass.cc
           util/printer_pass.cc
         )
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.cc
new file mode 100644
index 00000000000..d8ce2dd863b
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.cc
@@ -0,0 +1,58 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include "minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/engine/datasetops/map_op.h"
+#include "minddata/dataset/kernels/image/random_crop_decode_resize_op.h"
+
+namespace mindspore {
+namespace dataset {
+
+Status TensorOpFusionPass::RunOnNode(std::shared_ptr<MapOp> node, bool *modified) {
+  // Most primitive pattern: DecodeOp immediately followed by RandomCropAndResizeOp
+  // Abstract into a more general member function that can find any pattern, expressed
+  // by regular expressions, for instance.
+  // Add a list of optimisation policies. For now, just this lambda
+  auto FindPattern = [](auto &tfuncs) {
+    auto it =
+      std::find_if(tfuncs.begin(), tfuncs.end(), [](const auto &tf) -> bool { return tf->Name() == kDecodeOp; });
+    auto next = it + 1;
+    if (it != tfuncs.end() && next != tfuncs.end() && (*next)->Name() == kRandomCropAndResizeOp) {
+      return it;
+    } else {
+      return tfuncs.end();
+    }
+  };
+
+  auto &tfuncs = node->TFuncs();
+  auto it = FindPattern(tfuncs);
+  if (it != tfuncs.end()) {
+    auto next = it + 1;
+    auto op = static_cast<RandomCropAndResizeOp *>(next->get());
+    *it = std::static_pointer_cast<TensorOp>(std::make_shared<RandomCropDecodeResizeOp>(*op));
+    tfuncs.erase(next);
+  }
+  if (modified != nullptr) {
+    *modified = true;
+  } else {
+    RETURN_STATUS_UNEXPECTED("modified is nullptr");
+  }
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h
new file mode 100644
index 00000000000..a109af396c9
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_TENSOR_OP_FUSION_PASS_H_
+#define DATASET_TENSOR_OP_FUSION_PASS_H_
+
+#include <memory>
+#include "minddata/dataset/engine/opt/pass.h"
+
+namespace mindspore {
+namespace dataset {
+
+/// \class TensorOpFusionPass tensor_op_fusion_pass.h
+/// \brief And optional optimization pass identifying and fusing
+///     tensor ops within MapOp
+class TensorOpFusionPass : public NodePass {
+  /// \brief Identifies and fuses tensor ops within MapOp
+  /// \param[in] node The node being visited
+  /// \param[inout] *modified indicates whether the node has been visited
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<MapOp> node, bool *modified) override;
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_TENSOR_OP_FUSION_PASS_H_
diff --git a/mindspore/ccsrc/dataset/engine/opt/pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc
similarity index 53%
rename from mindspore/ccsrc/dataset/engine/opt/pass.cc
rename to mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc
index 27769f056b8..4a8bbaf38f0 100644
--- a/mindspore/ccsrc/dataset/engine/opt/pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc
@@ -14,23 +14,35 @@
  * limitations under the License.
  */
 
-#include "dataset/engine/opt/pass.h"
-#include "dataset/engine/datasetops/batch_op.h"
-#include "dataset/engine/datasetops/dataset_op.h"
-#include "dataset/engine/datasetops/device_queue_op.h"
-#include "dataset/engine/datasetops/map_op.h"
-#include "dataset/engine/datasetops/project_op.h"
-#include "dataset/engine/datasetops/rename_op.h"
-#include "dataset/engine/datasetops/filter_op.h"
-#include "dataset/engine/datasetops/repeat_op.h"
-#include "dataset/engine/datasetops/skip_op.h"
-#include "dataset/engine/datasetops/shuffle_op.h"
-#include "dataset/engine/datasetops/source/generator_op.h"
-#include "dataset/engine/datasetops/source/mindrecord_op.h"
-#include "dataset/engine/datasetops/source/tf_reader_op.h"
-#include "dataset/engine/datasetops/source/image_folder_op.h"
-#include "dataset/engine/datasetops/take_op.h"
-#include "dataset/engine/datasetops/zip_op.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/datasetops/batch_op.h"
+#include "minddata/dataset/engine/datasetops/cache_op.h"
+#include "minddata/dataset/engine/datasetops/cache_merge_op.h"
+#include "minddata/dataset/engine/datasetops/cache_lookup_op.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/datasetops/device_queue_op.h"
+#include "minddata/dataset/engine/datasetops/map_op.h"
+#include "minddata/dataset/engine/datasetops/project_op.h"
+#include "minddata/dataset/engine/datasetops/rename_op.h"
+#include "minddata/dataset/engine/datasetops/repeat_op.h"
+#include "minddata/dataset/engine/datasetops/skip_op.h"
+#include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/datasetops/source/celeba_op.h"
+#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
+#include "minddata/dataset/engine/datasetops/source/coco_op.h"
+#include "minddata/dataset/engine/datasetops/source/manifest_op.h"
+#include "minddata/dataset/engine/datasetops/source/mindrecord_op.h"
+#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
+#include "minddata/dataset/engine/datasetops/source/random_data_op.h"
+#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
+#include "minddata/dataset/engine/datasetops/source/voc_op.h"
+#ifdef ENABLE_PYTHON
+#include "minddata/dataset/engine/datasetops/filter_op.h"
+#include "minddata/dataset/engine/datasetops/source/generator_op.h"
+#endif
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/datasetops/take_op.h"
+#include "minddata/dataset/engine/datasetops/zip_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -111,11 +123,6 @@ Status NodePass::RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) {
   return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
 }
 
-Status NodePass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
-  // Fallback to base class visitor by default
-  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
-}
-
 Status NodePass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) {
   // Fallback to base class visitor by default
   return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
@@ -126,11 +133,6 @@ Status NodePass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) {
   return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
 }
 
-Status NodePass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
-  // Fallback to base class visitor by default
-  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
-}
-
 Status NodePass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) {
   // Fallback to base class visitor by default
   return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
@@ -141,6 +143,23 @@ Status NodePass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) {
   return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
 }
 
+#ifdef ENABLE_PYTHON
+Status NodePass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+#endif
+
+Status NodePass::RunOnNode(std::shared_ptr<RandomDataOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
 Status NodePass::RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) {
   // Fallback to base class visitor by default
   return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
@@ -160,5 +179,70 @@ Status NodePass::RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified)
   // Fallback to base class visitor by default
   return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
 }
+
+Status NodePass::RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<MnistOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<ManifestOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<CifarOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<VOCOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<CelebAOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<CocoOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<RepeatOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::RunOnNode(std::shared_ptr<CacheLookupOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::PreRunOnNode(std::shared_ptr<RepeatOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return PreRunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::PreRunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return PreRunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
+Status NodePass::PreRunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return PreRunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/engine/opt/pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.h
similarity index 79%
rename from mindspore/ccsrc/dataset/engine/opt/pass.h
rename to mindspore/ccsrc/minddata/dataset/engine/opt/pass.h
index 129c2fab37c..845ab34d664 100644
--- a/mindspore/ccsrc/dataset/engine/opt/pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.h
@@ -20,8 +20,8 @@
 #include <memory>
 #include <queue>
 
-#include "dataset/engine/execution_tree.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -33,18 +33,24 @@ class ProjectOp;
 
 class RenameOp;
 
-class FilterOp;
-
 class SkipOp;
 
 class ShuffleOp;
 
-class GeneratorOp;
-
 class MindRecordOp;
 
 class TFReaderOp;
 
+#ifdef ENABLE_PYTHON
+class FilterOp;
+
+class GeneratorOp;
+#endif
+
+class RandomDataOp;
+
+class RepeatOp;
+
 class TakeOp;
 
 class ZipOp;
@@ -53,6 +59,24 @@ class DeviceQueueOp;
 
 class ImageFolderOp;
 
+class CacheOp;
+
+class MnistOp;
+
+class ManifestOp;
+
+class CifarOp;
+
+class VOCOp;
+
+class CocoOp;
+
+class CelebAOp;
+
+class CacheMergeOp;
+
+class CacheLookupOp;
+
 // The base class Pass is the basic unit of tree transformation.
 // The actual implementation of the passes will be derived from here.
 class Pass : public std::enable_shared_from_this<Pass> {
@@ -122,26 +146,56 @@ class NodePass : public Pass {
 
   virtual Status RunOnNode(std::shared_ptr<RenameOp> node, bool *modified);
 
-  virtual Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified);
-
   virtual Status RunOnNode(std::shared_ptr<SkipOp> node, bool *modified);
 
   virtual Status RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified);
 
-  virtual Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified);
-
   virtual Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified);
 
   virtual Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified);
 
+#ifdef ENABLE_PYTHON
+  virtual Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified);
+#endif
+
+  virtual Status RunOnNode(std::shared_ptr<RandomDataOp> node, bool *modified);
+
   virtual Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified);
 
   virtual Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified);
 
   virtual Status RunOnNode(std::shared_ptr<DeviceQueueOp> node, bool *modified);
 
+  virtual Status RunOnNode(std::shared_ptr<CacheOp> node, bool *modified);
+
   virtual Status RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified);
 
+  virtual Status RunOnNode(std::shared_ptr<MnistOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<ManifestOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<CifarOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<VOCOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<CocoOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<CelebAOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<RepeatOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified);
+
+  virtual Status RunOnNode(std::shared_ptr<CacheLookupOp> node, bool *modified);
+
+  virtual Status PreRunOnNode(std::shared_ptr<CacheOp> node, bool *modified);
+
+  virtual Status PreRunOnNode(std::shared_ptr<RepeatOp> node, bool *modified);
+
+  virtual Status PreRunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified);
+
  private:
   // Helper function to perform DFS visit
   Status DFSNodeVisit(std::shared_ptr<DatasetOp> node, bool *modified);
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc
new file mode 100644
index 00000000000..59a3f71c53b
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc
@@ -0,0 +1,161 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include "minddata/dataset/engine/opt/post/repeat_pass.h"
+#include "minddata/dataset/engine/datasetops/repeat_op.h"
+#include "minddata/dataset/engine/datasetops/cache_op.h"
+#include "minddata/dataset/engine/datasetops/cache_lookup_op.h"
+#include "minddata/dataset/engine/datasetops/cache_merge_op.h"
+
+namespace mindspore {
+namespace dataset {
+
+RepeatPass::RepeatPass() : is_repeated_(false), nested_repeats_(0), is_merge_(false), cache_lookup_(nullptr) {}
+
+// Identifies the subtree below this node as being in a repeated path of the tree.
+Status RepeatPass::PreRunOnNode(std::shared_ptr<RepeatOp> node, bool *modified) {
+  // If we are already repeated, then this is a nested repeat.
+  if (is_repeated_) {
+    nested_repeats_++;
+  }
+  is_repeated_ = true;
+  return Status::OK();
+}
+
+// Identifies the subtree below this node as being in a cache merge path
+Status RepeatPass::PreRunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified) {
+  // Turn on the flag that we're under a merge op
+  is_merge_ = true;
+  return Status::OK();
+}
+
+// Hooks up any identified eoe nodes under this repeat.
+Status RepeatPass::RunOnNode(std::shared_ptr<RepeatOp> node, bool *modified) {
+  // Pop the leaf ops from the save-area stack and add them to the repeat op's eoe node tracking
+  std::shared_ptr<DatasetOp> leaf_op = PopFromEOEOpStack();
+  while (leaf_op != nullptr) {
+    node->AddToEoeList(leaf_op);
+    leaf_op = PopFromEOEOpStack();
+  }
+
+  // We are a repeat op in the descendant tree of a merge op, then we take the saved lookup up
+  // and add it to the list of eoe/leaf ops for the repeat, removing it from the save area.
+  if (is_merge_ && cache_lookup_) {
+    cache_lookup_->set_control_flag(DatasetOp::kDeOpRepeated);
+    node->AddToEoeList(std::move(cache_lookup_));
+  }
+
+  // If we are a nested repeat, then we add ourself to the repeat stack for the next one above us.
+  // A nested repeat acts like an eoe/leaf for the repeat in the ascendant tree.
+  if (nested_repeats_ > 0) {
+    node->set_control_flag(DatasetOp::kDeOpRepeated);
+    AddToEOEOpStack(node);
+    nested_repeats_--;
+  }
+
+  // If we are not nested, or we were the top-most repeat, now we clear the flag
+  if (nested_repeats_ == 0) {
+    is_repeated_ = false;
+  }
+
+  return Status::OK();
+}
+
+// CacheOp removes previous leaf ops and replaces them with itself
+Status RepeatPass::RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
+  if (is_repeated_) {
+    node->set_control_flag(DatasetOp::kDeOpRepeated);
+    // if we are a cache within a repeat path of the tree, then there will be
+    // eoe-generating ops in the eoe op stack in the tree.  They are flagged as such so that the
+    // repeat or epoch ctrl operators can work with them for repeat activity during runtime.
+    // However, since a cache is present:
+    // - unflag those ops as being repeated ops
+    // - remove them from the eoe op stack so that repeat op above in the tree won't know about them
+    // - add ourself (the cache op), as an eoe op
+    // We do this so that those old leafs become 1-time use (up to eoe), never repeated.  Instead
+    // the repeating behaviours shall be invoked against the cache op.
+    std::shared_ptr<DatasetOp> leaf_op = PopFromEOEOpStack();
+    while (leaf_op != nullptr) {
+      leaf_op->ClearControlFlag(DatasetOp::kDeOpLastRepeat);
+      leaf_op->ClearControlFlag(DatasetOp::kDeOpRepeated);
+      leaf_op = PopFromEOEOpStack();
+    }
+    AddToEOEOpStack(std::static_pointer_cast<DatasetOp>(node));
+  }
+
+  return Status::OK();
+}
+
+// All operators have a flag that might be set related to the repeat and any leaf nodes need to be set up
+// for use with a controlling repeat above it.
+Status RepeatPass::RunOnNode(std::shared_ptr<DatasetOp> node, bool *modified) {
+  // If we are in a repeat path, then set our repeated flag
+  if (is_repeated_) {
+    node->set_control_flag(DatasetOp::kDeOpRepeated);
+
+    // if we are a leaf node then save ourself in a stack for the repeat operator above us
+    if (node->IsLeaf()) {
+      AddToEOEOpStack(node);
+    }
+  }
+  return Status::OK();
+}
+
+// Turns off the tracking for operations under merge op
+Status RepeatPass::RunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified) {
+  // Setting the flag is needed since we didn't call the base class DatasetOp version
+  if (is_repeated_) node->set_control_flag(DatasetOp::kDeOpRepeated);
+  is_merge_ = false;
+  cache_lookup_.reset();  // If a repeat op did not consume this then it's no longer needed
+  return Status::OK();
+}
+
+// Saves the lookup up in case it needs to be referenced by a repeat
+Status RepeatPass::RunOnNode(std::shared_ptr<CacheLookupOp> node, bool *modified) {
+  if (!node->IsLeaf()) {
+    // By definition, the CacheLookup must be a leaf op.  Make that clear here.
+    RETURN_STATUS_UNEXPECTED("CacheLookupOp must be a leaf node!");
+  }
+
+  // If we are in a repeat path already, then there must be a repeat above the merge op
+  // In this case, we naturally are a repeating leaf op so add the required setup for leafs under repeat here.
+  if (is_repeated_) {
+    node->set_control_flag(DatasetOp::kDeOpRepeated);
+    AddToEOEOpStack(node);
+  } else {
+    // save the lookup op.  There could be a repeat in the cache miss leg of the merge op, in which case we
+    // may still need to be flagged as a repeating leaf.  We can't decide that here though, so save ourself
+    // into the pass so that the decision can be made during the processing of the cache miss leg of the merge.
+    cache_lookup_ = std::static_pointer_cast<DatasetOp>(node);
+  }
+  return Status::OK();
+}
+
+// Adds an operator to the eoe operator stack save area
+void RepeatPass::AddToEOEOpStack(std::shared_ptr<DatasetOp> dataset_op) { eoe_stack_.push(dataset_op); }
+
+// Pops an operator from the eoe operator stack save area
+std::shared_ptr<DatasetOp> RepeatPass::PopFromEOEOpStack() {
+  std::shared_ptr<DatasetOp> top_op = nullptr;
+  if (!eoe_stack_.empty()) {
+    top_op = eoe_stack_.top();
+    eoe_stack_.pop();
+  }
+  return top_op;
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h
new file mode 100644
index 00000000000..9b733e23292
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h
@@ -0,0 +1,98 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DATASET_ENGINE_OPT_PASS_POST_REPEAT_PASS_
+#define DATASET_ENGINE_OPT_PASS_POST_REPEAT_PASS_
+
+#include <memory>
+#include <stack>
+#include <utility>
+#include "minddata/dataset/engine/opt/pass.h"
+
+namespace mindspore {
+namespace dataset {
+
+/// \class RepeatPass repeat_pass.h
+/// \brief This is a NodePass who's job is to perform setup actions for RepeatOps. A RepeatOp needs to have references
+///     to the eoe-producing (typically leaf) nodes underneath it.
+class RepeatPass : public NodePass {
+ public:
+  /// \brief Constructor
+  RepeatPass();
+
+  /// \brief Identifies the subtree below this node as being in a repeated path of the tree.
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status PreRunOnNode(std::shared_ptr<RepeatOp> node, bool *modified) override;
+
+  /// \brief Identifies the subtree below this node as being in a cache merge path
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status PreRunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified) override;
+
+  /// \brief Hooks up any identified eoe nodes under this repeat.
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<RepeatOp> node, bool *modified) override;
+
+  /// \brief CacheOp removes previous leaf ops and replaces them with itself
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) override;
+
+  /// \brief Turns of the tracking for operations under merge op
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CacheMergeOp> node, bool *modified) override;
+
+  /// \brief Saves the lookup up in case it needs to be referenced by a repeat
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CacheLookupOp> node, bool *modified) override;
+
+  /// \brief All operators have a flag that might be set related to the repeat and any leaf nodes need to be set up
+  ///     for use with a controlling repeat above it.
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<DatasetOp> node, bool *modified) override;
+
+ private:
+  /// \brief Adds an operator to the eoe operator stack save area
+  /// \param op - The dataset op to work add to eoe stack
+  /// \return Status - The error code return
+  void AddToEOEOpStack(std::shared_ptr<DatasetOp> dataset_op);
+
+  /// \brief Pops an operator from the eoe operator stack save area
+  /// \return shared_ptr to the popped operator
+  std::shared_ptr<DatasetOp> PopFromEOEOpStack();
+
+  bool is_repeated_;                                  // T/F if we are processing under a repeat
+  bool is_merge_;                                     // T/F if we are processing under a cache merge op
+  int32_t nested_repeats_;                            // A counter for nested repeats
+  std::stack<std::shared_ptr<DatasetOp>> eoe_stack_;  // A save area for leaf/eoe ops
+  std::shared_ptr<DatasetOp> cache_lookup_;           // A save area for a cache lookup op
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_OPT_PASS_POST_REPEAT_PASS_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_pass.cc
new file mode 100644
index 00000000000..09b5f14a17b
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_pass.cc
@@ -0,0 +1,181 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include "minddata/dataset/engine/opt/pre/cache_pass.h"
+#include "minddata/dataset/engine/opt/pre/cache_transform_pass.h"
+#include "minddata/dataset/engine/datasetops/cache_op.h"
+#include "minddata/dataset/engine/datasetops/source/celeba_op.h"
+#include "minddata/dataset/engine/datasetops/source/generator_op.h"
+#include "minddata/dataset/engine/datasetops/source/manifest_op.h"
+#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
+#include "minddata/dataset/engine/datasetops/source/voc_op.h"
+#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
+#include "minddata/dataset/engine/datasetops/source/coco_op.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/datasetops/source/random_data_op.h"
+#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
+#include "minddata/dataset/engine/datasetops/source/mindrecord_op.h"
+
+namespace mindspore {
+namespace dataset {
+
+// Constructor
+CachePass::CachePass(CacheTransformPass *transform_pass)
+    : transform_pass_(transform_pass), is_caching_(false), leaf_op_(nullptr) {}
+
+// Identifies the subtree below this node as a cached descendant tree.
+Status CachePass::PreRunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
+  *modified = false;
+  MS_LOG(INFO) << "Cache transform pass: CacheOp found, identified descendant tree.";
+  if (is_caching_) {
+    RETURN_STATUS_UNEXPECTED("Nested cache operations is not supported!");
+  }
+  is_caching_ = true;
+  return Status::OK();
+}
+
+// Resets the tracking of the cache within the tree and assigns the operators that will be involved in a cache
+// transformation
+Status CachePass::RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
+  *modified = false;
+  is_caching_ = false;  // We a no longer in a cache subtree.  clear the flag.
+  if (leaf_op_) {
+    MS_LOG(INFO) << "Cache transform pass: Set up transformation nodes for mappable cache.";
+    // Assign the leaf op into the transform pass, using move to null our copy of it, and also assign the cache op,
+    // using base class pointers.
+    transform_pass_->AddMappableCacheOperators(std::move(leaf_op_), node);
+  } else {
+    // If there was no leaf_op set, then this is a non-mappable scenario.
+
+    if (sampler_) {
+      // Grab the sampler that was saved from the leaf and plug it into the cache op
+      node->SetSampler(std::move(sampler_));
+      MS_LOG(INFO) << "Cache transform pass: Set up cache sampler from non-mappable leaf.";
+    } else {
+      // We're a cache op but no sampler was saved from leaf, so create a default sampler
+      int64_t num_samples = 0;
+      int64_t start_index = 0;
+      sampler_ = std::make_shared<SequentialSampler>(num_samples, start_index);
+      node->SetSampler(std::move(sampler_));
+      MS_LOG(INFO) << "Cache transform pass: Creating default sequential sampler for cache op.";
+    }
+
+    // Get the computed check sum from all ops in our cache path below us and ask the cache op to create it's cache
+    uint32_t cache_crc = DatasetOp::GenerateCRC(node);
+    RETURN_IF_NOT_OK(node->CreateCache(cache_crc));
+  }
+
+  return Status::OK();
+}
+
+// Common code for mappable leaf setup.
+Status CachePass::MappableCacheLeafSetup(std::shared_ptr<DatasetOp> leaf_op) {
+  // If a leaf has already been assigned, then we have more than one leaf inside this cache descendant tree.
+  if (is_caching_ && leaf_op_) {
+    RETURN_STATUS_UNEXPECTED("There is currently no support for multiple leaf nodes under cache.");
+  }
+
+  // If we are a leaf in the caching path, then save this leaf.
+  if (is_caching_) {
+    MS_LOG(DEBUG) << "Cache transform pass: Mappable leaf in a cache descendant tree detected";
+    leaf_op_ = std::move(leaf_op);
+  }
+  return Status::OK();
+}
+
+// Common code for non mappable leaf setup.
+Status CachePass::NonMappableCacheLeafSetup(std::shared_ptr<DatasetOp> leaf_op) {
+  // If a leaf has already been assigned, then we have more than one leaf inside this cache descendant tree.
+  if (is_caching_ && leaf_op_) {
+    RETURN_STATUS_UNEXPECTED("There is currently no support for multiple leaf nodes under cache.");
+  }
+
+  // Sampler for non mapable dataset only works if there is a downstream cache. Remove it from the leaf
+  // as save it for use by cache op in ascendant tree.
+  if (is_caching_) {
+    RETURN_IF_NOT_OK(leaf_op->FetchRemoveSampler(&sampler_));
+    MS_LOG(DEBUG) << "Cache transform pass: Non mappable leaf in a cache descendant tree detected";
+  } else {
+    // If we are a non-mappable leaf and are not in a cache tree, then this sampler is not used so we can
+    // remove it here.  The leaf itself will provide it's own methods of fetching the data (not sampler-based)
+    std::shared_ptr<Sampler> sampler_from_leaf;
+    RETURN_IF_NOT_OK(leaf_op->FetchRemoveSampler(&sampler_from_leaf));
+  }
+  return Status::OK();
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) {
+  if (is_caching_) {
+    // If we are a TF Reader in a caching tree, then change our config so that it becomes a basic
+    // TF reader that parses all files.  Selection of data will come from the sampler on the cache instead.
+    node->MakeSimpleProducer();
+  }
+  return NonMappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<RandomDataOp> node, bool *modified) {
+  return NonMappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<MnistOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<ManifestOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<CifarOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<VOCOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<CocoOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<CelebAOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
+// Perform leaf node cache tranform identifications
+Status CachePass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_pass.h
new file mode 100644
index 00000000000..cbc805cd3e0
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_pass.h
@@ -0,0 +1,138 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DATASET_ENGINE_OPT_PASS_PRE_CACHE_PASS_H_
+#define DATASET_ENGINE_OPT_PASS_PRE_CACHE_PASS_H_
+
+#include <memory>
+#include <string>
+#include <utility>
+#include "minddata/dataset/engine/opt/pass.h"
+
+namespace mindspore {
+namespace dataset {
+
+class CacheTransformPass;
+
+/// \class CachePass cache_pass.h
+/// \brief This is a NodePass who's job is to identify and set up the nodes that will be involved in a cache
+///     transformation. It works in conjunction with the CacheTransformPass
+class CachePass : public NodePass {
+ public:
+  /// \brief Constructor
+  /// \param[in] transform_pass Raw pointer back to controlling tree pass
+  explicit CachePass(CacheTransformPass *transform_pass);
+
+  /// \brief Identifies the subtree below this node as a cached descendant tree.
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status PreRunOnNode(std::shared_ptr<CacheOp> node, bool *modified) override;
+
+  /// \brief Resets the tracking of the cache within the tree and assigns the operators that will be involved in a cache
+  ///     transformation
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<RandomDataOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<MnistOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<ManifestOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CifarOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<VOCOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CocoOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CelebAOp> node, bool *modified) override;
+
+  /// \brief Perform leaf node cache tranform identifications
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) override;
+
+ private:
+  /// \brief Common code for mappable leaf setup.
+  /// \param[in] node The leaf node performing setup work.
+  /// \return Status The error code return
+  Status MappableCacheLeafSetup(std::shared_ptr<DatasetOp> leaf_op);
+
+  /// \brief Common code for non-mappable leaf setup.
+  /// \param[in] node The leaf node performing setup work.
+  /// \return Status The error code return
+  Status NonMappableCacheLeafSetup(std::shared_ptr<DatasetOp> leaf_op);
+
+  bool is_caching_;
+  std::shared_ptr<DatasetOp> leaf_op_;
+  std::shared_ptr<Sampler> sampler_;
+  CacheTransformPass *transform_pass_;  // Back pointer to the owning transform pass
+};
+
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_OPT_PASS_PRE_CACHE_PASS_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.cc
new file mode 100644
index 00000000000..033150e8f41
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.cc
@@ -0,0 +1,108 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+#include "minddata/dataset/engine/opt/pre/cache_pass.h"
+#include "minddata/dataset/engine/opt/pre/cache_transform_pass.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/cache/cache_client.h"
+#include "minddata/dataset/engine/datasetops/cache_lookup_op.h"
+#include "minddata/dataset/engine/datasetops/cache_merge_op.h"
+#include "minddata/dataset/engine/datasetops/cache_op.h"
+
+namespace mindspore {
+namespace dataset {
+
+// constructor
+CacheTransformPass::CacheTransformPass() {}
+
+// Runs a cache_pass first to set up the transformation nodes, and then drives any of these transformations
+Status CacheTransformPass::RunOnTree(ExecutionTree *tree, bool *modified) {
+  MS_LOG(INFO) << "Pre pass: Cache transform pass started.";
+  // Create the cache pass and run it.  The cache pass identifies and creates the leaf/cache pairs that we will
+  // use to execute a transform.
+  std::unique_ptr<Pass> cache_pass = std::make_unique<CachePass>(this);
+  RETURN_IF_NOT_OK(cache_pass->Run(tree, modified));
+
+  // Then, execute the transform for each pair
+  for (auto cache_pair : cache_pairs_) {
+    MS_LOG(DEBUG) << "Cache transform pass: Executing a cache op mappable transform.";
+    ExecuteCacheTransform(tree, cache_pair.first, cache_pair.second, cache_pair.second->cache_client());
+  }
+  MS_LOG(INFO) << "Pre pass: Cache transform pass complete.";
+  return Status::OK();
+}
+
+// Helper function to execute the cache transformation.
+Status CacheTransformPass::ExecuteCacheTransform(ExecutionTree *tree, std::shared_ptr<DatasetOp> leaf_op,
+                                                 std::shared_ptr<DatasetOp> cache_op,
+                                                 std::shared_ptr<CacheClient> cache_client) {
+  // Get local pointers the child/parent of the cache op.  It's possible that the parent is null if the cache was
+  // the root node.  It is also possible that cache_child == leaf_op
+  std::shared_ptr<DatasetOp> cache_child = cache_op->child(0);
+  DatasetOp *cache_parent = nullptr;
+  cache_op->Parent(&cache_parent, 0);  // fetch the cache op's parent
+
+  // Extract the sampler from the leaf.  We will overwrite this sampler with the lookup op later.
+  std::shared_ptr<Sampler> leaf_sampler = leaf_op->sampler();
+
+  // Construct the merge op with defaults
+  std::shared_ptr<CacheMergeOp> merge_op;
+  CacheMergeOp::Builder merge_builder;
+  RETURN_IF_NOT_OK(merge_builder.SetClient(cache_client).Build(&merge_op));
+  RETURN_IF_NOT_OK(tree->AssociateNode(merge_op));
+
+  // Construct the cache lookup op with defaults
+  std::shared_ptr<CacheLookupOp> cache_lookup_op;
+  CacheLookupOp::Builder lookup_builder;
+  RETURN_IF_NOT_OK(lookup_builder.SetClient(cache_client).SetSampler(std::move(leaf_sampler)).Build(&cache_lookup_op));
+  RETURN_IF_NOT_OK(tree->AssociateNode(cache_lookup_op));
+
+  // Overwrite the old sampler in this leaf op to become the lookup op
+  leaf_op->SetSampler(cache_lookup_op);
+
+  // If the cache had a parent, then go into that parent to remove the cache from it's child list and then
+  // replace it with the merge op.
+  if (cache_parent != nullptr) {
+    RETURN_IF_NOT_OK(cache_parent->RemoveChild(cache_op));
+    RETURN_IF_NOT_OK(cache_parent->AddChild(merge_op));
+  } else {
+    // If we didn't have a parent, then the merge op is the root node
+    RETURN_IF_NOT_OK(tree->AssignRoot(merge_op));
+  }
+
+  // Set the cache op to no longer be a parent over it's child. This will fully disconnect the old cache op.
+  // We maintain a local pointer to the old child though.
+  RETURN_IF_NOT_OK(cache_op->RemoveChild(cache_child));
+
+  // Connect the merge op
+  RETURN_IF_NOT_OK(merge_op->AddChild(std::move(cache_lookup_op)));
+  RETURN_IF_NOT_OK(merge_op->AddChild(std::move(cache_child)));
+
+  // At this point, the cache op has already had it's children and parents taken away. Calling remove
+  // on it at this point will not do any node hookups, and instead set internal fields to invalid.
+  RETURN_IF_NOT_OK(cache_op->Remove());
+
+  return Status::OK();
+}
+
+// Assigns the leaf and cache operators that are involved in a cache transformation
+void CacheTransformPass::AddMappableCacheOperators(std::shared_ptr<DatasetOp> leaf_op,
+                                                   std::shared_ptr<CacheOp> cache_op) {
+  cache_pairs_.push_back(std::make_pair(leaf_op, cache_op));
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.h
new file mode 100644
index 00000000000..02c22c4472c
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.h
@@ -0,0 +1,79 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DATASET_ENGINE_OPT_PASS_PRE_CACHE_TRANSFORM_PASS_H_
+#define DATASET_ENGINE_OPT_PASS_PRE_CACHE_TRANSFORM_PASS_H_
+
+#include <memory>
+#include <utility>
+#include <vector>
+#include "minddata/dataset/engine/opt/pass.h"
+
+namespace mindspore {
+namespace dataset {
+
+class DatasetOp;
+
+class CacheClient;
+
+/// \class CacheTransformPass cache_transform_pass.h
+/// \brief This is a tree pass that will invoke a tree transformation to inject the correct operators for caching
+///     operations
+class CacheTransformPass : public TreePass {
+ public:
+  /// \brief Constructor
+  CacheTransformPass();
+
+  /// \brief Runs a cache_pass first to set up the transformation nodes, and then drives any of these transformations
+  /// \param[inout] tree The tree to operate on.
+  /// \param[inout] Indicate of the tree was modified.
+  /// \return Status The error code return
+  Status RunOnTree(ExecutionTree *tree, bool *modified) override;
+
+  /// \brief Assigns the leaf and cache operators that are involved in a cache transformation
+  /// \param[in] leaf_op The leaf operator involved in the cache transform
+  /// \param[in] cache_op The cache operator involved in the cache transform
+  void AddMappableCacheOperators(std::shared_ptr<DatasetOp> leaf_op, std::shared_ptr<CacheOp> cache_op);
+
+ private:
+  /// \brief Helper function to execute the cache transformation.
+  ///
+  ///     Input:
+  ///       Sampler
+  ///         |
+  ///       LeafOp --> OtherOps --> CacheOp
+  ///
+  ///     Transformed:
+  ///       Sampler --> CacheLookupOp ---------------->
+  ///                           |                       |
+  ///                           |                       MergeOp
+  ///                           |                       |
+  ///                           LeafOp --> OtherOps -->
+  ///
+  /// \param[in] leaf_op The leaf node in the transform
+  /// \param[in] cache_op The cache op in the transform (will get removed)
+  /// \param[in] cache_client The cache client
+  /// \return Status The error code return
+  Status ExecuteCacheTransform(ExecutionTree *tree, std::shared_ptr<DatasetOp> leaf_op,
+                               std::shared_ptr<DatasetOp> cache_op, std::shared_ptr<CacheClient> cache_client);
+
+  // The two operators that work together to establish the cache transform
+  std::vector<std::pair<std::shared_ptr<DatasetOp>, std::shared_ptr<CacheOp>>> cache_pairs_;
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_OPT_PASS_PRE_CACHE_TRANSFORM_PASS_H_
diff --git a/mindspore/ccsrc/dataset/engine/opt/pre/removal_nodes.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_nodes.cc
similarity index 61%
rename from mindspore/ccsrc/dataset/engine/opt/pre/removal_nodes.cc
rename to mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_nodes.cc
index 831a2a76ba4..f04d7bc07d2 100644
--- a/mindspore/ccsrc/dataset/engine/opt/pre/removal_nodes.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_nodes.cc
@@ -15,21 +15,37 @@
  */
 
 #include <memory>
-#include "dataset/engine/opt/pre/removal_nodes.h"
-#include "dataset/engine/opt/pre/removal_pass.h"
-#include "dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/opt/pre/removal_nodes.h"
+#include "minddata/dataset/engine/opt/pre/removal_pass.h"
+#include "minddata/dataset/engine/datasetops/shuffle_op.h"
 
 namespace mindspore {
 namespace dataset {
 
 RemovalNodes::RemovalNodes(RemovalPass *removal_pass) : removal_pass_(removal_pass), is_caching_(false) {}
 
+// Identifies the subtree below this node as a cached descendant tree.
+Status RemovalNodes::PreRunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
+  *modified = false;
+  MS_LOG(INFO) << "Removal pass: CacheOp found, identified descendant tree.";
+  is_caching_ = true;
+  return Status::OK();
+}
+
+// Resets the tracking of the cache within the tree
+Status RemovalNodes::RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
+  *modified = false;
+  MS_LOG(INFO) << "Removal pass: cache descendant tree complete.";
+  is_caching_ = false;
+  return Status::OK();
+}
+
 // Perform ShuffleOp removal check.
 Status RemovalNodes::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) {
   *modified = false;
   // If we are in a cache descendant tree, then this shuffle op needs to be removed
   if (is_caching_) {
-    MS_LOG(DEBUG) << "ShuffleOp identified for removal (CacheOp is in ascendant tree)";
+    MS_LOG(INFO) << "ShuffleOp identified for removal (CacheOp is in ascendant tree)";
     if (removal_pass_) {
       removal_pass_->AddToRemovalList(std::static_pointer_cast<DatasetOp>(node));
     } else {
diff --git a/mindspore/ccsrc/dataset/engine/opt/pre/removal_nodes.h b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_nodes.h
similarity index 68%
rename from mindspore/ccsrc/dataset/engine/opt/pre/removal_nodes.h
rename to mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_nodes.h
index 11ef37d80c4..32025cd597e 100644
--- a/mindspore/ccsrc/dataset/engine/opt/pre/removal_nodes.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_nodes.h
@@ -18,13 +18,11 @@
 #define DATASET_ENGINE_OPT_PASS_PRE_REMOVAL_NODES_H_
 
 #include <memory>
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/opt/pre/removal_pass.h"
 
 namespace mindspore {
 namespace dataset {
-
-class RemovalPass;
-
 /// \class RemovalNodes removal_nodes.h
 /// \brief This is a NodePass who's job is to identify which nodes should be removed.
 ///     It works in conjunction with the removal_pass.
@@ -34,6 +32,21 @@ class RemovalNodes : public NodePass {
   /// \param[in] removal_pass Raw pointer back to controlling tree pass
   explicit RemovalNodes(RemovalPass *removal_pass);
 
+  /// \brief Identifies the subtree below this node as a cached descendant tree.
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status PreRunOnNode(std::shared_ptr<CacheOp> node, bool *modified) override;
+
+  /// \brief Resets the tracking of the cache within the tree
+  /// \param[in] node The node being visited
+  /// \param[inout] modified Indicator if the node was changed at all
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) override;
+
+  /// \brief Destructor
+  ~RemovalNodes() = default;
+
   /// \brief Perform ShuffleOp removal check
   /// \param[in] node The node being visited
   /// \param[inout] modified Indicator if the node was changed at all
diff --git a/mindspore/ccsrc/dataset/engine/opt/pre/removal_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_pass.cc
similarity index 84%
rename from mindspore/ccsrc/dataset/engine/opt/pre/removal_pass.cc
rename to mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_pass.cc
index 31ec31234fa..0db422a7c25 100644
--- a/mindspore/ccsrc/dataset/engine/opt/pre/removal_pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_pass.cc
@@ -16,9 +16,9 @@
 
 #include <vector>
 #include <algorithm>
-#include "dataset/engine/opt/pre/removal_nodes.h"
-#include "dataset/engine/opt/pre/removal_pass.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/opt/pre/removal_nodes.h"
+#include "minddata/dataset/engine/opt/pre/removal_pass.h"
+#include "minddata/dataset/engine/execution_tree.h"
 
 namespace mindspore {
 namespace dataset {
@@ -28,6 +28,7 @@ RemovalPass::RemovalPass() {}
 
 // Runs a removal_nodes pass first to find out which nodes to remove, then removes them.
 Status RemovalPass::RunOnTree(ExecutionTree *tree, bool *modified) {
+  MS_LOG(INFO) << "Pre pass: removal pass started.";
   // Create the removal node pass which can identify which nodes need to be removed.
   std::unique_ptr<Pass> removal_nodes = std::make_unique<RemovalNodes>(this);
   RETURN_IF_NOT_OK(removal_nodes->Run(tree, modified));
@@ -36,6 +37,7 @@ Status RemovalPass::RunOnTree(ExecutionTree *tree, bool *modified) {
   for (auto node : removal_nodes_) {
     node->Remove();
   }
+  MS_LOG(INFO) << "Pre pass: removal pass complete.";
   return Status::OK();
 }
 
diff --git a/mindspore/ccsrc/dataset/engine/opt/pre/removal_pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_pass.h
similarity index 94%
rename from mindspore/ccsrc/dataset/engine/opt/pre/removal_pass.h
rename to mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_pass.h
index 6523ca69b2b..bcab7cf08cd 100644
--- a/mindspore/ccsrc/dataset/engine/opt/pre/removal_pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/removal_pass.h
@@ -19,7 +19,7 @@
 
 #include <memory>
 #include <vector>
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -34,6 +34,9 @@ class RemovalPass : public TreePass {
   /// \brief Constructor
   RemovalPass();
 
+  /// \brief Destructor
+  ~RemovalPass() = default;
+
   /// \brief Runs a removal_nodes pass first to find out which nodes to remove, then removes them.
   /// \param[inout] tree The tree to operate on.
   /// \param[inout] Indicate of the tree was modified.
diff --git a/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/opt/util/printer_pass.cc
rename to mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.cc
index 852bc018b20..eb74d8fcc35 100644
--- a/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.cc
@@ -15,7 +15,7 @@
  */
 
 #include <memory>
-#include "dataset/engine/opt/util/printer_pass.h"
+#include "minddata/dataset/engine/opt/util/printer_pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -50,12 +50,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) {
   return Status::OK();
 }
 
-Status PrinterPass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
-  *modified = false;
-  std::cout << "Visiting FilterOp" << '\n';
-  return Status::OK();
-}
-
 Status PrinterPass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) {
   *modified = false;
   std::cout << "Visiting SkipOp" << '\n';
@@ -67,11 +61,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) {
   return Status::OK();
 }
 
-Status PrinterPass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
-  *modified = false;
-  std::cout << "Visiting GeneratorOp" << '\n';
-  return Status::OK();
-}
 Status PrinterPass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) {
   *modified = false;
   std::cout << "Visiting MindRecordOp" << '\n';
@@ -84,6 +73,20 @@ Status PrinterPass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified)
   return Status::OK();
 }
 
+#ifdef ENABLE_PYTHON
+Status PrinterPass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
+  *modified = false;
+  std::cout << "Visiting FilterOp" << '\n';
+  return Status::OK();
+}
+
+Status PrinterPass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
+  *modified = false;
+  std::cout << "Visiting GeneratorOp" << '\n';
+  return Status::OK();
+}
+#endif
+
 Status PrinterPass::RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) {
   *modified = false;
   std::cout << "Visiting TakeOp" << '\n';
diff --git a/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/opt/util/printer_pass.h
rename to mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.h
index fa04a88277e..527df3ccc9a 100644
--- a/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.h
@@ -18,7 +18,7 @@
 #define DATASET_ENGINE_OPT_PASS_UTIL_PRINTER_H
 
 #include <memory>
-#include "dataset/engine/opt/pass.h"
+#include "minddata/dataset/engine/opt/pass.h"
 
 namespace mindspore {
 namespace dataset {
@@ -35,18 +35,20 @@ class PrinterPass : public NodePass {
 
   Status RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) override;
 
-  Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) override;
-
   Status RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) override;
 
   Status RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) override;
 
-  Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) override;
-
   Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) override;
 
   Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) override;
 
+#ifdef ENABLE_PYTHON
+  Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) override;
+
+  Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) override;
+#endif
+
   Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) override;
 
   Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified) override;
diff --git a/mindspore/ccsrc/dataset/engine/perf/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/perf/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/dataset/engine/perf/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/perf/CMakeLists.txt
diff --git a/mindspore/ccsrc/dataset/engine/perf/connector_size.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/engine/perf/connector_size.cc
rename to mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.cc
index 0bd27540752..20b4908030e 100644
--- a/mindspore/ccsrc/dataset/engine/perf/connector_size.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/perf/connector_size.h"
+#include "minddata/dataset/engine/perf/connector_size.h"
 #include <algorithm>
 #include <fstream>
 #include <memory>
 #include <string>
-#include "dataset/core/config_manager.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/util/path.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/util/path.h"
 
 using json = nlohmann::json;
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/perf/connector_size.h b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.h
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/perf/connector_size.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.h
index 2584289fb48..61ba06a76f9 100644
--- a/mindspore/ccsrc/dataset/engine/perf/connector_size.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.h
@@ -19,8 +19,8 @@
 #include <string>
 #include <vector>
 #include <nlohmann/json.hpp>
-#include "dataset/engine/perf/profiling.h"
-#include "dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
 
 using json = nlohmann::json;
 
diff --git a/mindspore/ccsrc/dataset/engine/perf/connector_throughput.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/perf/connector_throughput.cc
rename to mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.cc
index 4fd59de3902..b5e2efaf730 100644
--- a/mindspore/ccsrc/dataset/engine/perf/connector_throughput.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.cc
@@ -20,9 +20,9 @@
 #include <memory>
 #include <string>
 #include <nlohmann/json.hpp>
-#include "dataset/engine/perf/connector_throughput.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/util/path.h"
+#include "minddata/dataset/engine/perf/connector_throughput.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/util/path.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/connector_throughput.h b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.h
similarity index 90%
rename from mindspore/ccsrc/dataset/engine/perf/connector_throughput.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.h
index e873eb8315a..9cf387230ac 100644
--- a/mindspore/ccsrc/dataset/engine/perf/connector_throughput.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.h
@@ -22,16 +22,15 @@
 #include <fstream>
 #include <string>
 #include <nlohmann/json.hpp>
-#include "dataset/engine/perf/profiling.h"
-#include "dataset/engine/perf/perf_data.h"
-#include "dataset/engine/perf/cyclic_array.h"
-#include "dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/perf/perf_data.h"
+#include "minddata/dataset/engine/perf/cyclic_array.h"
+#include "minddata/dataset/engine/datasetops/dataset_op.h"
+#include "minddata/dataset/engine/execution_tree.h"
 
 using json = nlohmann::json;
 namespace mindspore {
 namespace dataset {
-class ExecutionTree;
-
 // Connector throughput samples the output connector size of each op in the pipeline.
 // For the description of the data structure see perf_buffer.h
 // It support JSON serialization for external usage.
@@ -52,6 +51,10 @@ class ConnectorThroughput : public Sampling {
     timestamps_.AddSample(std::vector<TimePoint>(1));
     out_buffer_count_table_.AddSample(std::vector<int64_t>(n_nodes_));
   }
+
+  /// \brief Destructor
+  ~ConnectorThroughput() = default;
+
   // Driver function for connector size sampling.
   // This function samples the connector size of every nodes within the ExecutionTree
   Status Sample() override;
diff --git a/mindspore/ccsrc/dataset/engine/perf/cyclic_array.h b/mindspore/ccsrc/minddata/dataset/engine/perf/cyclic_array.h
similarity index 99%
rename from mindspore/ccsrc/dataset/engine/perf/cyclic_array.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/cyclic_array.h
index fa60b401c53..2dfc3fd99d1 100644
--- a/mindspore/ccsrc/dataset/engine/perf/cyclic_array.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/cyclic_array.h
@@ -21,7 +21,7 @@
 #include <algorithm>
 #include <cstring>
 #include <type_traits>
-#include "dataset/core/constants.h"
+#include "minddata/dataset/core/constants.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/dataset_iterator_tracing.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.cc
rename to mindspore/ccsrc/minddata/dataset/engine/perf/dataset_iterator_tracing.cc
index 99b0c2d7e08..4491db144ee 100644
--- a/mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/dataset_iterator_tracing.cc
@@ -15,8 +15,8 @@
  */
 #include <fstream>
 #include <string>
-#include "dataset/engine/perf/dataset_iterator_tracing.h"
-#include "dataset/util/path.h"
+#include "minddata/dataset/engine/perf/dataset_iterator_tracing.h"
+#include "minddata/dataset/util/path.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.h b/mindspore/ccsrc/minddata/dataset/engine/perf/dataset_iterator_tracing.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/dataset_iterator_tracing.h
index 129863c6d1e..e7ba237a0a9 100644
--- a/mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/dataset_iterator_tracing.h
@@ -19,7 +19,7 @@
 
 #include <string>
 #include <vector>
-#include "dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/perf/profiling.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/device_queue_tracing.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.cc
rename to mindspore/ccsrc/minddata/dataset/engine/perf/device_queue_tracing.cc
index 204a83e3fb6..776b483b79a 100644
--- a/mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/device_queue_tracing.cc
@@ -16,8 +16,8 @@
 
 #include <fstream>
 #include <string>
-#include "dataset/engine/perf/device_queue_tracing.h"
-#include "dataset/util/path.h"
+#include "minddata/dataset/engine/perf/device_queue_tracing.h"
+#include "minddata/dataset/util/path.h"
 namespace mindspore {
 namespace dataset {
 
diff --git a/mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.h b/mindspore/ccsrc/minddata/dataset/engine/perf/device_queue_tracing.h
similarity index 96%
rename from mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/device_queue_tracing.h
index 13ef7121c15..32f9d2d8c2a 100644
--- a/mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/device_queue_tracing.h
@@ -19,7 +19,7 @@
 
 #include <string>
 #include <vector>
-#include "dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/perf/profiling.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/monitor.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/monitor.cc
similarity index 91%
rename from mindspore/ccsrc/dataset/engine/perf/monitor.cc
rename to mindspore/ccsrc/minddata/dataset/engine/perf/monitor.cc
index 8a0d682b81c..7fa7e6fc78f 100644
--- a/mindspore/ccsrc/dataset/engine/perf/monitor.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/monitor.cc
@@ -15,9 +15,9 @@
  */
 
 #include <vector>
-#include "dataset/core/config_manager.h"
-#include "dataset/engine/perf/monitor.h"
-#include "dataset/engine/execution_tree.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/engine/perf/monitor.h"
+#include "minddata/dataset/engine/execution_tree.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/monitor.h b/mindspore/ccsrc/minddata/dataset/engine/perf/monitor.h
similarity index 93%
rename from mindspore/ccsrc/dataset/engine/perf/monitor.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/monitor.h
index 8b4245db8e4..1e669dad719 100644
--- a/mindspore/ccsrc/dataset/engine/perf/monitor.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/monitor.h
@@ -20,8 +20,8 @@
 #include <memory>
 #include <unordered_map>
 #include <vector>
-#include "dataset/util/status.h"
-#include "dataset/engine/perf/profiling.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/perf/profiling.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/perf_data.h b/mindspore/ccsrc/minddata/dataset/engine/perf/perf_data.h
similarity index 98%
rename from mindspore/ccsrc/dataset/engine/perf/perf_data.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/perf_data.h
index a201d705eac..8f215fd8df8 100644
--- a/mindspore/ccsrc/dataset/engine/perf/perf_data.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/perf_data.h
@@ -18,7 +18,7 @@
 #define DATASET_PERF_DATA_H
 
 #include <vector>
-#include "dataset/core/constants.h"
+#include "minddata/dataset/core/constants.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/perf/profiling.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/engine/perf/profiling.cc
rename to mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc
index 66f27c46ba3..f5c018c03bb 100644
--- a/mindspore/ccsrc/dataset/engine/perf/profiling.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/perf/profiling.h"
 #include <sys/time.h>
 #include <cstdlib>
 #include <fstream>
 #include "common/utils.h"
-#include "dataset/util/path.h"
-#include "dataset/engine/perf/monitor.h"
-#include "dataset/engine/perf/device_queue_tracing.h"
-#include "dataset/engine/perf/connector_size.h"
-#include "dataset/engine/perf/connector_throughput.h"
-#include "dataset/engine/perf/dataset_iterator_tracing.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/engine/perf/monitor.h"
+#include "minddata/dataset/engine/perf/device_queue_tracing.h"
+#include "minddata/dataset/engine/perf/connector_size.h"
+#include "minddata/dataset/engine/perf/connector_throughput.h"
+#include "minddata/dataset/engine/perf/dataset_iterator_tracing.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/perf/profiling.h b/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.h
similarity index 99%
rename from mindspore/ccsrc/dataset/engine/perf/profiling.h
rename to mindspore/ccsrc/minddata/dataset/engine/perf/profiling.h
index e38c2d5e543..24f7f2efe81 100644
--- a/mindspore/ccsrc/dataset/engine/perf/profiling.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.h
@@ -21,7 +21,7 @@
 #include <unordered_map>
 #include <memory>
 #include <chrono>
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/tdt/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/tdt/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/dataset/engine/tdt/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/engine/tdt/CMakeLists.txt
diff --git a/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc b/mindspore/ccsrc/minddata/dataset/engine/tdt/tdt_plugin.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc
rename to mindspore/ccsrc/minddata/dataset/engine/tdt/tdt_plugin.cc
index ca9f2176f56..126291179a8 100644
--- a/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/tdt/tdt_plugin.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/engine/tdt/tdt_plugin.h"
+#include "minddata/dataset/engine/tdt/tdt_plugin.h"
 #include "common/utils.h"
 #include "utils/log_adapter.h"
-#include "dataset/engine/perf/profiling.h"
+#include "minddata/dataset/engine/perf/profiling.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.h b/mindspore/ccsrc/minddata/dataset/engine/tdt/tdt_plugin.h
similarity index 91%
rename from mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.h
rename to mindspore/ccsrc/minddata/dataset/engine/tdt/tdt_plugin.h
index 304b205b81a..a7db08b7f58 100644
--- a/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/tdt/tdt_plugin.h
@@ -24,9 +24,9 @@
 #include <vector>
 #include "tdt/tdt_host_interface.h"
 
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_row.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_row.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/core/constants.h b/mindspore/ccsrc/minddata/dataset/include/dataset/core/constants.h
new file mode 120000
index 00000000000..22fe6d07e1e
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/core/constants.h
@@ -0,0 +1 @@
+../../../core/constants.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/core/data_type.h b/mindspore/ccsrc/minddata/dataset/include/dataset/core/data_type.h
new file mode 120000
index 00000000000..37a0e1b686e
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/core/data_type.h
@@ -0,0 +1 @@
+../../../core/data_type.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/core/tensor_shape.h b/mindspore/ccsrc/minddata/dataset/include/dataset/core/tensor_shape.h
new file mode 120000
index 00000000000..1fb7a24d912
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/core/tensor_shape.h
@@ -0,0 +1 @@
+../../../core/tensor_shape.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/util/status.h b/mindspore/ccsrc/minddata/dataset/include/dataset/util/status.h
new file mode 120000
index 00000000000..b06279c05b5
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/util/status.h
@@ -0,0 +1 @@
+../../../util/status.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/include/datasets.h b/mindspore/ccsrc/minddata/dataset/include/datasets.h
new file mode 100644
index 00000000000..6f38f5ea167
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h
@@ -0,0 +1,357 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DATASET_INCLUDE_DATASETS_H_
+#define DATASET_INCLUDE_DATASETS_H_
+
+#include <vector>
+#include <memory>
+#include <set>
+#include <map>
+#include <utility>
+#include <string>
+#include "minddata/dataset/include/tensor.h"
+#include "minddata/dataset/include/iterator.h"
+#include "minddata/dataset/include/samplers.h"
+
+namespace mindspore {
+namespace dataset {
+
+// Forward declare
+class DatasetOp;
+class DataSchema;
+class Tensor;
+class TensorShape;
+
+namespace api {
+
+class TensorOperation;
+class SamplerObj;
+class ImageFolderDataset;
+class MnistDataset;
+class BatchDataset;
+class RepeatDataset;
+class MapDataset;
+class ShuffleDataset;
+class Cifar10Dataset;
+class ProjectDataset;
+
+/// \brief Function to create an ImageFolderDataset
+/// \notes A source dataset that reads images from a tree of directories
+///    All images within one folder have the same label
+///    The generated dataset has two columns ['image', 'label']
+/// \param[in] dataset_dir Path to the root directory that contains the dataset
+/// \param[in] decode A flag to decode in ImageFolder
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
+///    A `RandomSampler` will be used to randomly iterate the entire dataset
+/// \param[in] extensions File extensions to be read
+/// \param[in] class_indexing a class name to label map
+/// \return Shared pointer to the current ImageFolderDataset
+std::shared_ptr<ImageFolderDataset> ImageFolder(std::string dataset_dir, bool decode = false,
+                                                std::shared_ptr<SamplerObj> sampler = nullptr,
+                                                std::set<std::string> extensions = {},
+                                                std::map<std::string, int32_t> class_indexing = {});
+
+/// \brief Function to create a MnistDataset
+/// \notes The generated dataset has two columns ['image', 'label']
+/// \param[in] dataset_dir Path to the root directory that contains the dataset
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
+///    A `RandomSampler` will be used to randomly iterate the entire dataset
+/// \return Shared pointer to the current MnistDataset
+std::shared_ptr<MnistDataset> Mnist(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler = nullptr);
+
+/// \brief Function to create a Cifar10 Dataset
+/// \notes The generated dataset has two columns ['image', 'label']
+/// \param[in] dataset_dir Path to the root directory that contains the dataset
+/// \param[in] num_samples The number of images to be included in the dataset
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
+///    will be used to randomly iterate the entire dataset
+/// \return Shared pointer to the current Dataset
+std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, int32_t num_samples,
+                                        std::shared_ptr<SamplerObj> sampler);
+
+/// \class Dataset datasets.h
+/// \brief A base class to represent a dataset in the data pipeline.
+class Dataset : public std::enable_shared_from_this<Dataset> {
+ public:
+  friend class Iterator;
+
+  /// \brief Constructor
+  Dataset();
+
+  /// \brief Destructor
+  ~Dataset() = default;
+
+  /// \brief Pure virtual function to convert a Dataset class into a runtime dataset object
+  /// \return shared pointer to the list of newly created DatasetOps
+  virtual std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() = 0;
+
+  /// \brief Pure virtual function for derived class to implement parameters validation
+  /// \return bool True if all the params are valid
+  virtual bool ValidateParams() = 0;
+
+  /// \brief Setter function for runtime number of workers
+  /// \param[in] num_workers The number of threads in this operator
+  /// \return Shared pointer to the original object
+  std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers) {
+    num_workers_ = num_workers;
+    return shared_from_this();
+  }
+
+  /// \brief Function to create an Iterator over the Dataset pipeline
+  /// \return Shared pointer to the Iterator
+  std::shared_ptr<Iterator> CreateIterator();
+
+  /// \brief Function to create a BatchDataset
+  /// \notes Combines batch_size number of consecutive rows into batches
+  /// \param[in] batch_size Path to the root directory that contains the dataset
+  /// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete
+  ///    batch. If true, and if there are less than batch_size rows
+  ///    available to make the last batch, then those rows will
+  ///    be dropped and not propagated to the next node
+  /// \return Shared pointer to the current BatchDataset
+  std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false);
+
+  /// \brief Function to create a RepeatDataset
+  /// \notes Repeats this dataset count times. Repeat indefinitely if count is -1
+  /// \param[in] count Number of times the dataset should be repeated
+  /// \return Shared pointer to the current Dataset
+  /// \note Repeat will return shared pointer to `Dataset` instead of `RepeatDataset`
+  ///    due to a limitation in the current implementation
+  std::shared_ptr<Dataset> Repeat(int32_t count = -1);
+
+  /// \brief Function to create a MapDataset
+  /// \notes Applies each operation in operations to this dataset
+  /// \param[in] operations Vector of operations to be applied on the dataset. Operations are
+  ///    applied in the order they appear in this list
+  /// \param[in] input_columns Vector of the names of the columns that will be passed to the first
+  ///    operation as input. The size of this list must match the number of
+  ///    input columns expected by the first operator. The default input_columns
+  ///    is the first column
+  /// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation
+  ///    This parameter is mandatory if len(input_columns) != len(output_columns)
+  ///    The size of this list must match the number of output columns of the
+  ///    last operation. The default output_columns will have the same
+  ///    name as the input columns, i.e., the columns will be replaced
+  /// \param[in] project_columns A list of column names to project
+  /// \return Shared pointer to the current MapDataset
+  std::shared_ptr<MapDataset> Map(std::vector<std::shared_ptr<TensorOperation>> operations,
+                                  std::vector<std::string> input_columns = {},
+                                  std::vector<std::string> output_columns = {},
+                                  const std::vector<std::string> &project_columns = {});
+
+  /// \brief Function to create a Shuffle Dataset
+  /// \notes Randomly shuffles the rows of this dataset
+  /// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling
+  /// \return Shared pointer to the current ShuffleDataset
+  std::shared_ptr<ShuffleDataset> Shuffle(int32_t shuffle_size);
+
+  /// \brief Function to create a Project Dataset
+  /// \notes Applies project to the dataset
+  /// \param[in] columns The name of columns to project
+  /// \return Shared pointer to the current Dataset
+  std::shared_ptr<ProjectDataset> Project(const std::vector<std::string> &columns);
+
+ protected:
+  std::vector<std::shared_ptr<Dataset>> children;
+  std::shared_ptr<Dataset> parent;
+
+  int32_t num_workers_;
+  int32_t rows_per_buffer_;
+  int32_t connector_que_size_;
+};
+
+/* ####################################### Derived Dataset classes ################################# */
+
+/// \class ImageFolderDataset
+/// \brief A Dataset derived class to represent ImageFolder dataset
+class ImageFolderDataset : public Dataset {
+ public:
+  /// \brief Constructor
+  ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, bool recursive,
+                     std::set<std::string> extensions, std::map<std::string, int32_t> class_indexing);
+
+  /// \brief Destructor
+  ~ImageFolderDataset() = default;
+
+  /// \brief a base class override function to create the required runtime dataset op objects for this class
+  /// \return shared pointer to the list of newly created DatasetOps
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  std::string dataset_dir_;
+  bool decode_;
+  bool recursive_;
+  std::shared_ptr<SamplerObj> sampler_;
+  std::map<std::string, int32_t> class_indexing_;
+  std::set<std::string> exts_;
+};
+
+class MnistDataset : public Dataset {
+ public:
+  /// \brief Constructor
+  MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler);
+
+  /// \brief Destructor
+  ~MnistDataset() = default;
+
+  /// \brief a base class override function to create the required runtime dataset op objects for this class
+  /// \return shared pointer to the list of newly created DatasetOps
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  std::string dataset_dir_;
+  std::shared_ptr<SamplerObj> sampler_;
+};
+
+class BatchDataset : public Dataset {
+ public:
+  /// \brief Constructor
+  BatchDataset(int32_t batch_size, bool drop_remainder, bool pad, std::vector<std::string> cols_to_map,
+               std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map);
+
+  /// \brief Destructor
+  ~BatchDataset() = default;
+
+  /// \brief a base class override function to create the required runtime dataset op objects for this class
+  /// \return shared pointer to the list of newly created DatasetOps
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  int32_t batch_size_;
+  bool drop_remainder_;
+  bool pad_;
+  std::vector<std::string> cols_to_map_;
+  std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map_;
+};
+
+class RepeatDataset : public Dataset {
+ public:
+  /// \brief Constructor
+  explicit RepeatDataset(uint32_t count);
+
+  /// \brief Destructor
+  ~RepeatDataset() = default;
+
+  /// \brief a base class override function to create the required runtime dataset op objects for this class
+  /// \return shared pointer to the list of newly created DatasetOps
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  uint32_t repeat_count_;
+};
+
+class ShuffleDataset : public Dataset {
+ public:
+  ShuffleDataset(int32_t shuffle_size, bool reset_every_epoch);
+
+  ~ShuffleDataset() = default;
+
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  int32_t shuffle_size_;
+  uint32_t shuffle_seed_;
+  bool reset_every_epoch_;
+};
+
+class MapDataset : public Dataset {
+ public:
+  /// \brief Constructor
+  MapDataset(std::vector<std::shared_ptr<TensorOperation>> operations, std::vector<std::string> input_columns = {},
+             std::vector<std::string> output_columns = {}, const std::vector<std::string> &columns = {});
+
+  /// \brief Destructor
+  ~MapDataset() = default;
+
+  /// \brief a base class override function to create the required runtime dataset op objects for this class
+  /// \return shared pointer to the list of newly created DatasetOps
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  std::vector<std::shared_ptr<TensorOperation>> operations_;
+  std::vector<std::string> input_columns_;
+  std::vector<std::string> output_columns_;
+  std::vector<std::string> project_columns_;
+};
+
+class Cifar10Dataset : public Dataset {
+ public:
+  /// \brief Constructor
+  Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr<SamplerObj> sampler);
+
+  /// \brief Destructor
+  ~Cifar10Dataset() = default;
+
+  /// \brief a base class override function to create the required runtime dataset op objects for this class
+  /// \return shared pointer to the list of newly created DatasetOps
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  std::string dataset_dir_;
+  int32_t num_samples_;
+  std::shared_ptr<SamplerObj> sampler_;
+};
+
+class ProjectDataset : public Dataset {
+ public:
+  /// \brief Constructor
+  explicit ProjectDataset(const std::vector<std::string> &columns);
+
+  /// \brief Destructor
+  ~ProjectDataset() = default;
+
+  /// \brief a base class override function to create the required runtime dataset op objects for this class
+  /// \return shared pointer to the list of newly created DatasetOps
+  std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  std::vector<std::string> columns_;
+};
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_INCLUDE_DATASETS_H_
diff --git a/mindspore/ccsrc/minddata/dataset/include/iterator.h b/mindspore/ccsrc/minddata/dataset/include/iterator.h
new file mode 100644
index 00000000000..c3784821a6e
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/iterator.h
@@ -0,0 +1,115 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DATASET_INCLUDE_ITERATOR_H_
+#define DATASET_INCLUDE_ITERATOR_H_
+
+#include <unordered_map>
+#include <memory>
+#include <vector>
+#include <string>
+#include "minddata/dataset/include/status.h"
+
+namespace mindspore {
+namespace dataset {
+
+// Forward declare
+class ExecutionTree;
+class DatasetIterator;
+class DatasetOp;
+class Tensor;
+
+namespace api {
+
+class Dataset;
+
+using TensorMap = std::unordered_map<std::string, std::shared_ptr<Tensor>>;
+
+// Abstract class for iterating over the dataset.
+class Iterator {
+ public:
+  /// \brief Constructor
+  Iterator() = default;
+
+  /// \brief Destructor
+  ~Iterator() = default;
+
+  /// \brief Method for building and launching the pipeline.
+  /// \param[in] ops - a vector of DatasetOp in the data pipeline.
+  /// \return - a Status error code, returns OK if no error encountered.
+  Status BuildAndLaunchTree(std::shared_ptr<Dataset> ds);
+
+  /// \brief Function to get the next row from the data pipeline.
+  /// \param[out] row - the output tensor row.
+  void GetNextRow(TensorMap *row);
+
+  /// \brief Function to shut down the data pipeline.
+  void Stop();
+
+  class _Iterator {
+   public:
+    explicit _Iterator(Iterator *lt) : lt_{lt}, cur_row_{nullptr} {
+      if (lt_) {
+        cur_row_ = new TensorMap();
+        lt_->GetNextRow(cur_row_);
+      }
+    }
+
+    // Destructor
+    ~_Iterator() {
+      if (cur_row_) {
+        delete cur_row_;
+      }
+    }
+
+    _Iterator &operator++() {
+      if (lt_) {
+        ++ind_;
+        lt_->GetNextRow(cur_row_);
+      }
+      if (cur_row_ && cur_row_->size() == 0) {
+        delete cur_row_;
+        cur_row_ = nullptr;
+      }
+      return *this;
+    }                                             // prefix ++ overload
+    TensorMap &operator*() { return *cur_row_; }  // dereference operator
+    TensorMap *operator->() { return cur_row_; }
+
+    bool operator!=(const _Iterator &rhs) { return cur_row_ != rhs.cur_row_; }
+
+   private:
+    int ind_;  // the cur node our Iterator points to
+    Iterator *lt_;
+    TensorMap *cur_row_;
+  };
+
+  _Iterator begin() { return _Iterator(this); }
+
+  _Iterator end() { return _Iterator(nullptr); }
+
+ private:
+  // Runtime tree.
+  // Use shared_ptr instead of unique_ptr because the DatasetIterator constructor takes in a shared_ptr type.
+  std::shared_ptr<ExecutionTree> tree_;
+
+  // Runtime iterator
+  std::unique_ptr<DatasetIterator> iterator_;
+};
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_INCLUDE_ITERATOR_H_
diff --git a/mindspore/ccsrc/minddata/dataset/include/samplers.h b/mindspore/ccsrc/minddata/dataset/include/samplers.h
new file mode 100644
index 00000000000..3d57e67059c
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/samplers.h
@@ -0,0 +1,199 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DATASET_API_SAMPLERS_H_
+#define DATASET_API_SAMPLERS_H_
+
+#include <vector>
+#include <memory>
+
+namespace mindspore {
+namespace dataset {
+
+// Internal Sampler class forward declaration
+class Sampler;
+
+namespace api {
+
+class SamplerObj : public std::enable_shared_from_this<SamplerObj> {
+ public:
+  SamplerObj();
+
+  ~SamplerObj() = default;
+
+  virtual std::shared_ptr<Sampler> Build() = 0;
+  virtual bool ValidateParams() = 0;
+};
+
+class DistributedSamplerObj;
+class PKSamplerObj;
+class RandomSamplerObj;
+class SequentialSamplerObj;
+class SubsetRandomSamplerObj;
+class WeightedRandomSamplerObj;
+
+/// Function to create a Distributed Sampler.
+/// \notes A Sampler that access a shard of the dataset.
+/// \param[in] num_shards - Number of shards to divide the dataset into.
+/// \param[in] shard_id - Shard ID of the current shard within num_shards.
+/// \param[in] shuffle - If true, the indices are shuffled.
+/// \param[in] num_samples - The number of samples to draw (default to all elements).
+/// \param[in] seed - The seed in use when shuffle is true.
+/// \return Shared pointer to the current Sampler.
+std::shared_ptr<DistributedSamplerObj> DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true,
+                                                          int64_t num_samples = 0, uint32_t seed = 1);
+
+/// Function to create a PK Sampler.
+/// \notes Samples K elements for each P class in the dataset.
+///        This will sample all classes.
+/// \param[in] num_val - Number of elements to sample for each class.
+/// \param[in] shuffle - If true, the class IDs are shuffled.
+/// \param[in] num_samples - The number of samples to draw (default to all elements).
+/// \return Shared pointer to the current Sampler.
+std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0);
+
+/// Function to create a Random Sampler.
+/// \notes Samples the elements randomly.
+/// \param[in] replacement - If True, put the sample ID back for the next draw.
+/// \param[in] num_samples - The number of samples to draw (default to all elements).
+/// \return Shared pointer to the current Sampler.
+std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement = false, int64_t num_samples = 0);
+
+/// Function to create a Sequential Sampler.
+/// \notes Samples the dataset elements sequentially, same as not having a sampler.
+/// \param[in] start_index - Index to start sampling at (dafault to start at first id).
+/// \param[in] num_samples - The number of samples to draw (default to all elements).
+/// \return Shared pointer to the current Sampler.
+std::shared_ptr<SequentialSamplerObj> SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0);
+
+/// Function to create a Subset Random Sampler.
+/// \notes Samples the elements randomly from a sequence of indices.
+/// \param[in] indices - A vector sequence of indices.
+/// \param[in] num_samples - The number of samples to draw (default to all elements).
+/// \return Shared pointer to the current Sampler.
+std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(const std::vector<int64_t> &indices,
+                                                            int64_t num_samples = 0);
+
+/// Function to create a Weighted Random Sampler.
+/// \notes Samples the elements from [0, len(weights) - 1] randomly with the given
+///        weights (probabilities).
+/// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1.
+/// \param[in] num_samples - The number of samples to draw (default to all elements).
+/// \param[in] replacement - If True, put the sample ID back for the next draw.
+/// \return Shared pointer to the current Sampler.
+std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(const std::vector<double> &weights,
+                                                                int64_t num_samples = 0, bool replacement = true);
+
+/* ####################################### Derived Sampler classes ################################# */
+class DistributedSamplerObj : public SamplerObj {
+ public:
+  DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples, uint32_t seed);
+
+  ~DistributedSamplerObj() = default;
+
+  std::shared_ptr<Sampler> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  int64_t num_shards_;
+  int64_t shard_id_;
+  bool shuffle_;
+  int64_t num_samples_;
+  uint32_t seed_;
+};
+
+class PKSamplerObj : public SamplerObj {
+ public:
+  PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples);
+
+  ~PKSamplerObj() = default;
+
+  std::shared_ptr<Sampler> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  int64_t num_val_;
+  bool shuffle_;
+  int64_t num_samples_;
+};
+
+class RandomSamplerObj : public SamplerObj {
+ public:
+  RandomSamplerObj(bool replacement, int64_t num_samples);
+
+  ~RandomSamplerObj() = default;
+
+  std::shared_ptr<Sampler> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  bool replacement_;
+  int64_t num_samples_;
+};
+
+class SequentialSamplerObj : public SamplerObj {
+ public:
+  SequentialSamplerObj(int64_t start_index, int64_t num_samples);
+
+  ~SequentialSamplerObj() = default;
+
+  std::shared_ptr<Sampler> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  int64_t start_index_;
+  int64_t num_samples_;
+};
+
+class SubsetRandomSamplerObj : public SamplerObj {
+ public:
+  SubsetRandomSamplerObj(const std::vector<int64_t> &indices, int64_t num_samples);
+
+  ~SubsetRandomSamplerObj() = default;
+
+  std::shared_ptr<Sampler> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  const std::vector<int64_t> &indices_;
+  int64_t num_samples_;
+};
+
+class WeightedRandomSamplerObj : public SamplerObj {
+ public:
+  explicit WeightedRandomSamplerObj(const std::vector<double> &weights, int64_t num_samples = 0,
+                                    bool replacement = true);
+
+  ~WeightedRandomSamplerObj() = default;
+
+  std::shared_ptr<Sampler> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  const std::vector<double> &weights_;
+  int64_t num_samples_;
+  bool replacement_;
+};
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_API_SAMPLERS_H_
diff --git a/mindspore/ccsrc/minddata/dataset/include/status.h b/mindspore/ccsrc/minddata/dataset/include/status.h
new file mode 120000
index 00000000000..bba92b63ad9
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/status.h
@@ -0,0 +1 @@
+../util/status.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/include/tensor.h b/mindspore/ccsrc/minddata/dataset/include/tensor.h
new file mode 120000
index 00000000000..34b5e020a9f
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/tensor.h
@@ -0,0 +1 @@
+../core/tensor.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/include/transforms.h b/mindspore/ccsrc/minddata/dataset/include/transforms.h
new file mode 100644
index 00000000000..31531a20af0
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/transforms.h
@@ -0,0 +1,380 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DATASET_API_TRANSFORMS_H_
+#define DATASET_API_TRANSFORMS_H_
+
+#include <vector>
+#include <memory>
+#include "minddata/dataset/core/constants.h"
+
+namespace mindspore {
+namespace dataset {
+
+class TensorOp;
+
+namespace api {
+// Abstract class to represent a dataset in the data pipeline.
+class TensorOperation : public std::enable_shared_from_this<TensorOperation> {
+ public:
+  /// \brief Constructor
+  TensorOperation();
+
+  /// \brief Destructor
+  ~TensorOperation() = default;
+
+  /// \brief Pure virtual function to convert a TensorOperation class into a runtime TensorOp object.
+  /// \return shared pointer to the newly created TensorOp.
+  virtual std::shared_ptr<TensorOp> Build() = 0;
+
+  virtual bool ValidateParams() = 0;
+};
+
+// Transform operations for performing computer vision.
+namespace vision {
+
+class NormalizeOperation;
+class DecodeOperation;
+class ResizeOperation;
+class RandomCropOperation;
+class CenterCropOperation;
+class UniformAugOperation;
+class RandomHorizontalFlipOperation;
+class RandomVerticalFlipOperation;
+class RandomRotationOperation;
+class PadOperation;
+class CutOutOperation;
+class RandomColorAdjustOperation;
+
+/// \brief Function to create a Normalize TensorOperation.
+/// \notes Normalize the input image with respect to mean and standard deviation.
+/// \param[in] mean - a vector of mean values for each channel, w.r.t channel order.
+/// \param[in] std - a vector of standard deviations for each channel, w.r.t. channel order.
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<NormalizeOperation> Normalize(std::vector<float> mean, std::vector<float> std);
+
+/// \brief Function to create a Decode TensorOperation.
+/// \notes Decode the input image in RGB mode.
+/// \param[in] rgb - a boolean of whether to decode in RGB mode or not.
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<DecodeOperation> Decode(bool rgb = true);
+
+/// \brief Function to create a Resize TensorOperation.
+/// \notes Resize the input image to the given size..
+/// \param[in] size - a vector representing the output size of the resized image.
+///               If size is a single value, the image will be resized to this value with
+///               the same image aspect ratio. If size has 2 values, it should be (height, width).
+/// \param[in] interpolation An enum for the mode of interpolation
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<ResizeOperation> Resize(std::vector<int32_t> size,
+                                        InterpolationMode interpolation = InterpolationMode::kLinear);
+
+/// \brief Function to create a RandomCrop TensorOperation.
+/// \notes Crop the input image at a random location.
+/// \param[in] size - a vector representing the output size of the cropped image.
+///               If size is a single value, a square crop of size (size, size) is returned.
+///               If size has 2 values, it should be (height, width).
+/// \param[in] padding - a vector with the value of pixels to pad the image. If 4 values are provided,
+///                  it pads the left, top, right and bottom respectively.
+/// \param[in] pad_if_needed - a boolean whether to pad the image if either side is smaller than
+///                        the given output size.
+/// \param[in] fill_value - a vector representing the pixel intensity of the borders, it is used to
+///                     fill R, G, B channels respectively.
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<RandomCropOperation> RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
+                                                bool pad_if_needed = false,
+                                                std::vector<uint8_t> fill_value = {0, 0, 0});
+
+/// \brief Function to create a CenterCrop TensorOperation.
+/// \notes Crops the input image at the center to the given size.
+/// \param[in] size - a vector representing the output size of the cropped image.
+///               If size is a single value, a square crop of size (size, size) is returned.
+///               If size has 2 values, it should be (height, width).
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<CenterCropOperation> CenterCrop(std::vector<int32_t> size);
+
+/// \brief Function to create a UniformAugment TensorOperation.
+/// \notes Tensor operation to perform randomly selected augmentation.
+/// \param[in] operations - a vector of TensorOperation operations.
+/// \param[in] num_ops - integer representing the number of OPs to be selected and applied.
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<UniformAugOperation> UniformAugment(std::vector<std::shared_ptr<TensorOperation>> operations,
+                                                    int32_t num_ops = 2);
+
+/// \brief Function to create a RandomHorizontalFlip TensorOperation.
+/// \notes Tensor operation to perform random horizontal flip.
+/// \param[in] prob - float representing the probability of flip.
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<RandomHorizontalFlipOperation> RandomHorizontalFlip(float prob = 0.5);
+
+/// \brief Function to create a RandomVerticalFlip TensorOperation.
+/// \notes Tensor operation to perform random vertical flip.
+/// \param[in] prob - float representing the probability of flip.
+/// \return Shared pointer to the current TensorOperation.
+std::shared_ptr<RandomVerticalFlipOperation> RandomVerticalFlip(float prob = 0.5);
+
+/// \brief Function to create a RandomRotation TensorOp
+/// \notes Rotates the image according to parameters
+/// \param[in] degrees A float vector size 2, representing the starting and ending degree
+/// \param[in] resample An enum for the mode of interpolation
+/// \param[in] expand A boolean representing whether the image is expanded after rotation
+/// \param[in] center A float vector size 2, representing the x and y center of rotation.
+/// \param[in] fill_value A uint8_t vector size 3, representing the rgb value of the fill color
+/// \return Shared pointer to the current TensorOp
+std::shared_ptr<RandomRotationOperation> RandomRotation(
+  std::vector<float> degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour, bool expand = false,
+  std::vector<float> center = {-1, -1}, std::vector<uint8_t> fill_value = {0, 0, 0});
+
+/// \brief Function to create a Pad TensorOp
+/// \notes Pads the image according to padding parameters
+/// \param[in] padding A vector representing the number of pixels to pad the image
+///    If vector has one value, it pads all sides of the image with that value
+///    If vector has two values, it pads left and right with the first and
+///    top and bottom with the second value
+///    If vector has four values, it pads left, top, right, and bottom with
+///    those values respectively
+/// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
+///    BorderType.kConstant. If 3 values are provided,
+///    it is used to fill R, G, B channels respectively
+/// \param[in] padding_mode The method of padding (default=BorderType.kConstant)
+///    Can be any of
+///    [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric]
+///    - BorderType.kConstant, means it fills the border with constant values
+///    - BorderType.kEdge, means it pads with the last value on the edge
+///    - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge
+///    - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge
+/// \return Shared pointer to the current TensorOp
+std::shared_ptr<PadOperation> Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
+                                  BorderType padding_mode = BorderType::kConstant);
+
+/// \brief Function to create a CutOut TensorOp
+/// \notes Randomly cut (mask) out a given number of square patches from the input image
+/// \param[in] length Integer representing the side length of each square patch
+/// \param[in] num_patches Integer representing the number of patches to be cut out of an image
+/// \return Shared pointer to the current TensorOp
+std::shared_ptr<CutOutOperation> CutOut(int32_t length, int32_t num_patches = 1);
+
+/// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image
+/// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values
+///     if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
+/// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values
+///     if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
+/// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values
+///     if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
+/// \param[in] hue Brightness adjustment factor. Must be a vector of one or two values
+///     if it's a vector of two values it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5
+///     Default value is {0, 0}
+/// \return Shared pointer to the current TensorOp
+std::shared_ptr<RandomColorAdjustOperation> RandomColorAdjust(std::vector<float> brightness = {1.0, 1.0},
+                                                              std::vector<float> contrast = {1.0, 1.0},
+                                                              std::vector<float> saturation = {1.0, 1.0},
+                                                              std::vector<float> hue = {0.0, 0.0});
+
+/* ####################################### Derived TensorOperation classes ################################# */
+
+class NormalizeOperation : public TensorOperation {
+ public:
+  NormalizeOperation(std::vector<float> mean, std::vector<float> std);
+
+  ~NormalizeOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<float> mean_;
+  std::vector<float> std_;
+};
+
+class DecodeOperation : public TensorOperation {
+ public:
+  explicit DecodeOperation(bool rgb = true);
+
+  ~DecodeOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  bool rgb_;
+};
+
+class ResizeOperation : public TensorOperation {
+ public:
+  explicit ResizeOperation(std::vector<int32_t> size,
+                           InterpolationMode interpolation_mode = InterpolationMode::kLinear);
+
+  ~ResizeOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<int32_t> size_;
+  InterpolationMode interpolation_;
+};
+
+class RandomCropOperation : public TensorOperation {
+ public:
+  RandomCropOperation(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
+                      bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0});
+
+  ~RandomCropOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<int32_t> size_;
+  std::vector<int32_t> padding_;
+  bool pad_if_needed_;
+  std::vector<uint8_t> fill_value_;
+};
+
+class CenterCropOperation : public TensorOperation {
+ public:
+  explicit CenterCropOperation(std::vector<int32_t> size);
+
+  ~CenterCropOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<int32_t> size_;
+};
+
+class UniformAugOperation : public TensorOperation {
+ public:
+  explicit UniformAugOperation(std::vector<std::shared_ptr<TensorOperation>> operations, int32_t num_ops = 2);
+
+  ~UniformAugOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<std::shared_ptr<TensorOperation>> operations_;
+  int32_t num_ops_;
+};
+
+class RandomHorizontalFlipOperation : public TensorOperation {
+ public:
+  explicit RandomHorizontalFlipOperation(float probability = 0.5);
+
+  ~RandomHorizontalFlipOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  float probability_;
+};
+
+class RandomVerticalFlipOperation : public TensorOperation {
+ public:
+  explicit RandomVerticalFlipOperation(float probability = 0.5);
+
+  ~RandomVerticalFlipOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  float probability_;
+};
+
+class RandomRotationOperation : public TensorOperation {
+ public:
+  RandomRotationOperation(std::vector<float> degrees, InterpolationMode interpolation_mode, bool expand,
+                          std::vector<float> center, std::vector<uint8_t> fill_value);
+
+  ~RandomRotationOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<float> degrees_;
+  InterpolationMode interpolation_mode_;
+  std::vector<float> center_;
+  bool expand_;
+  std::vector<uint8_t> fill_value_;
+};
+
+class PadOperation : public TensorOperation {
+ public:
+  PadOperation(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
+               BorderType padding_mode = BorderType::kConstant);
+
+  ~PadOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<int32_t> padding_;
+  std::vector<uint8_t> fill_value_;
+  BorderType padding_mode_;
+};
+
+class CutOutOperation : public TensorOperation {
+ public:
+  explicit CutOutOperation(int32_t length, int32_t num_patches = 1);
+
+  ~CutOutOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  int32_t length_;
+  int32_t num_patches_;
+};
+
+class RandomColorAdjustOperation : public TensorOperation {
+ public:
+  RandomColorAdjustOperation(std::vector<float> brightness = {1.0, 1.0}, std::vector<float> contrast = {1.0, 1.0},
+                             std::vector<float> saturation = {1.0, 1.0}, std::vector<float> hue = {0.0, 0.0});
+
+  ~RandomColorAdjustOperation() = default;
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  bool ValidateParams() override;
+
+ private:
+  std::vector<float> brightness_;
+  std::vector<float> contrast_;
+  std::vector<float> saturation_;
+  std::vector<float> hue_;
+};
+}  // namespace vision
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_API_TRANSFORMS_H_
diff --git a/mindspore/ccsrc/minddata/dataset/include/utils/log_adapter.h b/mindspore/ccsrc/minddata/dataset/include/utils/log_adapter.h
new file mode 120000
index 00000000000..f2c939bc0be
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/utils/log_adapter.h
@@ -0,0 +1 @@
+../../../../utils/log_adapter.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/include/utils/overload.h b/mindspore/ccsrc/minddata/dataset/include/utils/overload.h
new file mode 120000
index 00000000000..7dc313d512a
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/include/utils/overload.h
@@ -0,0 +1 @@
+../../../../utils/overload.h
\ No newline at end of file
diff --git a/mindspore/ccsrc/dataset/kernels/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/kernels/CMakeLists.txt
similarity index 50%
rename from mindspore/ccsrc/dataset/kernels/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/kernels/CMakeLists.txt
index 2ebdd15e3c9..8a9096ff23d 100644
--- a/mindspore/ccsrc/dataset/kernels/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/kernels/CMakeLists.txt
@@ -2,7 +2,13 @@ add_subdirectory(image)
 add_subdirectory(data)
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(kernels OBJECT
-    py_func_op.cc
-    tensor_op.cc)
-target_include_directories(kernels PRIVATE ${pybind11_INCLUDE_DIRS})
+if (ENABLE_PYTHON)
+    add_library(kernels OBJECT
+        py_func_op.cc
+        tensor_op.cc)
+    target_include_directories(kernels PRIVATE ${pybind11_INCLUDE_DIRS})
+else()
+    add_library(kernels OBJECT
+        tensor_op.cc)
+endif()
+
diff --git a/mindspore/ccsrc/dataset/kernels/data/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/kernels/data/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/dataset/kernels/data/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/kernels/data/CMakeLists.txt
diff --git a/mindspore/ccsrc/dataset/kernels/data/concatenate_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/concatenate_op.cc
similarity index 90%
rename from mindspore/ccsrc/dataset/kernels/data/concatenate_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/concatenate_op.cc
index 87115fd3ced..0c91b38b2df 100644
--- a/mindspore/ccsrc/dataset/kernels/data/concatenate_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/concatenate_op.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/data/concatenate_op.h"
+#include "minddata/dataset/kernels/data/concatenate_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/data/data_utils.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/concatenate_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/concatenate_op.h
similarity index 93%
rename from mindspore/ccsrc/dataset/kernels/data/concatenate_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/concatenate_op.h
index 4e4c7ad4e08..46cc6130495 100644
--- a/mindspore/ccsrc/dataset/kernels/data/concatenate_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/concatenate_op.h
@@ -21,8 +21,8 @@
 #include <vector>
 #include <memory>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -55,6 +55,8 @@ class ConcatenateOp : public TensorOp {
   /// Number of inputs the tensor operation accepts
   uint32_t NumInput() override { return 0; }
 
+  std::string Name() const override { return kConcatenateOp; }
+
  private:
   int8_t axis_;
   std::shared_ptr<Tensor> prepend_;
diff --git a/mindspore/ccsrc/dataset/kernels/data/data_utils.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/kernels/data/data_utils.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.cc
index 40eba1edf6b..b1d51a6c081 100644
--- a/mindspore/ccsrc/dataset/kernels/data/data_utils.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.cc
@@ -14,20 +14,22 @@
  * limitations under the License.
  */
 
-#include "dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
 
 #include <algorithm>
 #include <limits>
 #include <string>
 #include <vector>
 
-#include "dataset/core/constants.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/pybind_support.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/kernels/data/type_cast_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/data_type.h"
+#ifdef ENABLE_PYTHON
+#include "minddata/dataset/core/pybind_support.h"
+#endif
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/kernels/data/type_cast_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -113,22 +115,27 @@ Status OneHotEncoding(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *ou
 }
 
 Status Fill(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, std::shared_ptr<Tensor> fill_value) {
-  CHECK_FAIL_RETURN_UNEXPECTED(!((fill_value->type() == DataType::DE_STRING) && (input->type() != DataType::DE_STRING)),
+  const DataType &fill_type = fill_value->type();
+  const DataType &input_type = input->type();
+  const TensorShape &input_shape = input->shape();
+
+  CHECK_FAIL_RETURN_UNEXPECTED(!((fill_type == DataType::DE_STRING) && (input_type != DataType::DE_STRING)),
                                "Types do not match");
 
   CHECK_FAIL_RETURN_UNEXPECTED(fill_value->shape() == TensorShape({}), "fill_value is not a scalar");
 
-  std::shared_ptr<Tensor> out;
+  std::shared_ptr<Tensor> out, fill_output;
 
-  const DataType &to = input->type();
-  std::unique_ptr<TypeCastOp> op(new TypeCastOp(to));
+  if (input_type != DataType::DE_STRING && fill_type != DataType::DE_STRING && input_type != fill_type) {
+    auto op = std::make_unique<TypeCastOp>(input_type);
+    RETURN_IF_NOT_OK(op->Compute(fill_value, &fill_output));
+  } else {
+    fill_output = fill_value;
+  }
 
-  std::shared_ptr<Tensor> fill_output;
-  RETURN_IF_NOT_OK(op->Compute(fill_value, &fill_output));
+  RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, TensorImpl::kFlexible, input_shape, input_type));
 
-  RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, TensorImpl::kFlexible, input->shape(), input->type()));
-
-  switch (input->type().value()) {
+  switch (input_type.value()) {
     case DataType::DE_BOOL: {
       bool value = 0;
       RETURN_IF_NOT_OK(fill_output->GetItemAt(&value, {}));
@@ -206,10 +213,10 @@ Status Fill(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output
       std::string_view fill_string_view;
       RETURN_IF_NOT_OK(fill_value->GetItemAt(&fill_string_view, {}));
       std::string fill_string = std::string(fill_string_view);
-      for (int i = 0; i < input->shape().NumOfElements(); i++) {
+      for (int i = 0; i < input_shape.NumOfElements(); i++) {
         strings.emplace_back(fill_string);
       }
-      RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, strings, input->shape()));
+      RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, strings, input_shape));
       break;
     }
     case DataType::DE_UNKNOWN: {
diff --git a/mindspore/ccsrc/dataset/kernels/data/data_utils.h b/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.h
similarity index 97%
rename from mindspore/ccsrc/dataset/kernels/data/data_utils.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.h
index 6034e2a0eb6..141545a583a 100644
--- a/mindspore/ccsrc/dataset/kernels/data/data_utils.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.h
@@ -19,11 +19,11 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "dataset/core/constants.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_row.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_row.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/duplicate_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/duplicate_op.cc
similarity index 87%
rename from mindspore/ccsrc/dataset/kernels/data/duplicate_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/duplicate_op.cc
index 959516a4aa7..57a424704f0 100644
--- a/mindspore/ccsrc/dataset/kernels/data/duplicate_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/duplicate_op.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "dataset/kernels/data/duplicate_op.h"
+#include "minddata/dataset/kernels/data/duplicate_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/duplicate_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/duplicate_op.h
similarity index 87%
rename from mindspore/ccsrc/dataset/kernels/data/duplicate_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/duplicate_op.h
index 4c9d6d36c93..60b2d8c33bc 100644
--- a/mindspore/ccsrc/dataset/kernels/data/duplicate_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/duplicate_op.h
@@ -18,9 +18,10 @@
 
 #include <vector>
 #include <memory>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -36,6 +37,8 @@ class DuplicateOp : public TensorOp {
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
   uint32_t NumOutput() override { return 2; }
+
+  std::string Name() const override { return kDuplicateOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/data/fill_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/fill_op.cc
similarity index 81%
rename from mindspore/ccsrc/dataset/kernels/data/fill_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/fill_op.cc
index 63895d3a956..f8dc746dffb 100644
--- a/mindspore/ccsrc/dataset/kernels/data/fill_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/fill_op.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/data/fill_op.h"
+#include "minddata/dataset/kernels/data/fill_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/data/data_utils.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/fill_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/fill_op.h
similarity index 89%
rename from mindspore/ccsrc/dataset/kernels/data/fill_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/fill_op.h
index 03f59f3e67a..af0d9e7941d 100644
--- a/mindspore/ccsrc/dataset/kernels/data/fill_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/fill_op.h
@@ -21,8 +21,8 @@
 #include <vector>
 #include <memory>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -35,6 +35,8 @@ class FillOp : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kFillOp; }
+
  private:
   std::shared_ptr<Tensor> fill_value_;
 };
diff --git a/mindspore/ccsrc/dataset/kernels/data/mask_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/mask_op.cc
similarity index 91%
rename from mindspore/ccsrc/dataset/kernels/data/mask_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/mask_op.cc
index 2cfeb7e36fb..2dbe501a47c 100644
--- a/mindspore/ccsrc/dataset/kernels/data/mask_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/mask_op.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "dataset/kernels/data/mask_op.h"
+#include "minddata/dataset/kernels/data/mask_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/mask_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/mask_op.h
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/data/mask_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/mask_op.h
index 0affe543bbd..e6ac8c39645 100644
--- a/mindspore/ccsrc/dataset/kernels/data/mask_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/mask_op.h
@@ -22,10 +22,10 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/kernels/data/type_cast_op.h"
-#include "dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/kernels/data/type_cast_op.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
 
 namespace mindspore {
 namespace dataset {
@@ -43,6 +43,8 @@ class MaskOp : public TensorOp {
 
   Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
 
+  std::string Name() const override { return kMaskOp; }
+
  private:
   RelationalOp op_;
   std::shared_ptr<Tensor> value_;
diff --git a/mindspore/ccsrc/dataset/kernels/data/one_hot_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/one_hot_op.cc
similarity index 88%
rename from mindspore/ccsrc/dataset/kernels/data/one_hot_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/one_hot_op.cc
index 65d1a183b33..e2b7b74a965 100644
--- a/mindspore/ccsrc/dataset/kernels/data/one_hot_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/one_hot_op.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/data/one_hot_op.h"
+#include "minddata/dataset/kernels/data/one_hot_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/data/data_utils.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/one_hot_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/one_hot_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/kernels/data/one_hot_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/one_hot_op.h
index 80494dc5c01..06a48235738 100644
--- a/mindspore/ccsrc/dataset/kernels/data/one_hot_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/one_hot_op.h
@@ -20,8 +20,8 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -37,6 +37,8 @@ class OneHotOp : public TensorOp {
 
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
 
+  std::string Name() const override { return kOneHotOp; }
+
  private:
   int num_classes_;
 };
diff --git a/mindspore/ccsrc/dataset/kernels/data/pad_end_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/pad_end_op.cc
similarity index 86%
rename from mindspore/ccsrc/dataset/kernels/data/pad_end_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/pad_end_op.cc
index 5b3b4cbe166..7b83137d884 100644
--- a/mindspore/ccsrc/dataset/kernels/data/pad_end_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/pad_end_op.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/data/pad_end_op.h"
+#include "minddata/dataset/kernels/data/pad_end_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/data/data_utils.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/pad_end_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/pad_end_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/kernels/data/pad_end_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/pad_end_op.h
index c6bc0c430e8..c28f7250e0e 100644
--- a/mindspore/ccsrc/dataset/kernels/data/pad_end_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/pad_end_op.h
@@ -20,8 +20,8 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -38,6 +38,8 @@ class PadEndOp : public TensorOp {
 
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
 
+  std::string Name() const override { return kPadEndOp; }
+
  private:
   TensorShape output_shape_;
   std::shared_ptr<Tensor> pad_val_;
diff --git a/mindspore/ccsrc/dataset/kernels/data/slice_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.cc
similarity index 91%
rename from mindspore/ccsrc/dataset/kernels/data/slice_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.cc
index 2eebf26e841..66f48d5c2b6 100644
--- a/mindspore/ccsrc/dataset/kernels/data/slice_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/data/slice_op.h"
+#include "minddata/dataset/kernels/data/slice_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/slice_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.h
similarity index 94%
rename from mindspore/ccsrc/dataset/kernels/data/slice_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.h
index 0a24ae171ee..1cf99830c9e 100644
--- a/mindspore/ccsrc/dataset/kernels/data/slice_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/slice_op.h
@@ -22,8 +22,8 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -71,6 +71,8 @@ class SliceOp : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kSliceOp; }
+
  private:
   // only on of the following will be valid
   // given indices to slice the Tensor. Empty vector if invalid.
diff --git a/mindspore/ccsrc/dataset/kernels/data/to_float16_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/to_float16_op.cc
similarity index 84%
rename from mindspore/ccsrc/dataset/kernels/data/to_float16_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/to_float16_op.cc
index 1cd79456e05..c52162b1aac 100644
--- a/mindspore/ccsrc/dataset/kernels/data/to_float16_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/to_float16_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/data/to_float16_op.h"
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/data/data_utils.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/kernels/data/to_float16_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/to_float16_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/to_float16_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/kernels/data/to_float16_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/to_float16_op.h
index 3fca50bf07f..91f660ca9cf 100644
--- a/mindspore/ccsrc/dataset/kernels/data/to_float16_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/to_float16_op.h
@@ -22,8 +22,8 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -42,6 +42,8 @@ class ToFloat16Op : public TensorOp {
   void Print(std::ostream &out) const override { out << "ToFloat16Op"; }
 
   Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
+
+  std::string Name() const override { return kToFloat16Op; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/data/type_cast_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/type_cast_op.cc
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/data/type_cast_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/data/type_cast_op.cc
index 74c84a668ab..5a58745293a 100644
--- a/mindspore/ccsrc/dataset/kernels/data/type_cast_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/type_cast_op.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/data/type_cast_op.h"
+#include "minddata/dataset/kernels/data/type_cast_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/data/data_utils.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/data/type_cast_op.h b/mindspore/ccsrc/minddata/dataset/kernels/data/type_cast_op.h
similarity index 91%
rename from mindspore/ccsrc/dataset/kernels/data/type_cast_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/data/type_cast_op.h
index 1b3f2c3290c..b82bc32342a 100644
--- a/mindspore/ccsrc/dataset/kernels/data/type_cast_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/data/type_cast_op.h
@@ -20,8 +20,8 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -42,6 +42,8 @@ class TypeCastOp : public TensorOp {
   void Print(std::ostream &out) const override { out << "TypeCastOp"; }
   Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
 
+  std::string Name() const override { return kTypeCastOp; }
+
  private:
   DataType type_;
 };
diff --git a/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt
similarity index 95%
rename from mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt
index fef698912c4..c0c575de9af 100644
--- a/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt
@@ -15,7 +15,7 @@ add_library(kernels-image OBJECT
     random_crop_op.cc
     random_crop_with_bbox_op.cc
     random_horizontal_flip_op.cc
-    random_horizontal_flip_bbox_op.cc
+    random_horizontal_flip_with_bbox_op.cc
     bounding_box_augment_op.cc
     random_resize_op.cc
     random_rotation_op.cc
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/bounding_box_augment_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/bounding_box_augment_op.cc
new file mode 100644
index 00000000000..618ed4d3560
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/bounding_box_augment_op.cc
@@ -0,0 +1,76 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+#include <utility>
+#include "minddata/dataset/kernels/image/bounding_box_augment_op.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/core/cv_tensor.h"
+
+namespace mindspore {
+namespace dataset {
+const float BoundingBoxAugmentOp::kDefRatio = 0.3;
+
+BoundingBoxAugmentOp::BoundingBoxAugmentOp(std::shared_ptr<TensorOp> transform, float ratio)
+    : ratio_(ratio), uniform_(0, 1), transform_(std::move(transform)) {
+  rnd_.seed(GetSeed());
+}
+
+Status BoundingBoxAugmentOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  BOUNDING_BOX_CHECK(input);  // check if bounding boxes are valid
+  uint32_t num_of_boxes = input[1]->shape()[0];
+  std::shared_ptr<Tensor> crop_out;
+  std::shared_ptr<Tensor> res_out;
+  std::shared_ptr<CVTensor> input_restore = CVTensor::AsCVTensor(input[0]);
+  for (uint32_t i = 0; i < num_of_boxes; i++) {
+    // using a uniform distribution to ensure op happens with probability ratio_
+    if (uniform_(rnd_) < ratio_) {
+      float min_x = 0;
+      float min_y = 0;
+      float b_w = 0;
+      float b_h = 0;
+      // get the required items
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&min_x, {i, 0}));
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&min_y, {i, 1}));
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&b_w, {i, 2}));
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&b_h, {i, 3}));
+      RETURN_IF_NOT_OK(Crop(input_restore, &crop_out, static_cast<int>(min_x), static_cast<int>(min_y),
+                            static_cast<int>(b_w), static_cast<int>(b_h)));
+      // transform the cropped bbox region
+      RETURN_IF_NOT_OK(transform_->Compute(crop_out, &res_out));
+      // place the transformed region back in the restored input
+      std::shared_ptr<CVTensor> res_img = CVTensor::AsCVTensor(res_out);
+      // check if transformed crop is out of bounds of the box
+      if (res_img->mat().cols > b_w || res_img->mat().rows > b_h || res_img->mat().cols < b_w ||
+          res_img->mat().rows < b_h) {
+        // if so, resize to fit in the box
+        std::shared_ptr<TensorOp> resize_op =
+          std::make_shared<ResizeOp>(static_cast<int32_t>(b_h), static_cast<int32_t>(b_w));
+        RETURN_IF_NOT_OK(resize_op->Compute(std::static_pointer_cast<Tensor>(res_img), &res_out));
+        res_img = CVTensor::AsCVTensor(res_out);
+      }
+      res_img->mat().copyTo(input_restore->mat()(cv::Rect(min_x, min_y, res_img->mat().cols, res_img->mat().rows)));
+    }
+  }
+  (*output).push_back(std::move(std::static_pointer_cast<Tensor>(input_restore)));
+  (*output).push_back(input[1]);
+  return Status::OK();
+}
+
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/bounding_box_augment_op.h
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/bounding_box_augment_op.h
index 6c106f75dc6..8e30c5738d8 100644
--- a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/bounding_box_augment_op.h
@@ -20,11 +20,12 @@
 #include <memory>
 #include <random>
 #include <cstdlib>
+#include <string>
 #include <opencv2/imgproc/imgproc.hpp>
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
@@ -50,9 +51,12 @@ class BoundingBoxAugmentOp : public TensorOp {
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
+  std::string Name() const override { return kBoundingBoxAugmentOp; }
+
  private:
   float ratio_;
   std::mt19937 rnd_;
+  std::uniform_real_distribution<float> uniform_;
   std::shared_ptr<TensorOp> transform_;
 };
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/kernels/image/center_crop_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/center_crop_op.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/kernels/image/center_crop_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/center_crop_op.cc
index a5129e9c713..35079b05cd6 100644
--- a/mindspore/ccsrc/dataset/kernels/image/center_crop_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/center_crop_op.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/center_crop_op.h"
+#include "minddata/dataset/kernels/image/center_crop_op.h"
 #include <string>
 #include "common/utils.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/center_crop_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/center_crop_op.h
similarity index 87%
rename from mindspore/ccsrc/dataset/kernels/image/center_crop_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/center_crop_op.h
index eb8e71ba7c7..1f8cbcf230c 100644
--- a/mindspore/ccsrc/dataset/kernels/image/center_crop_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/center_crop_op.h
@@ -18,10 +18,11 @@
 
 #include <memory>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -39,6 +40,8 @@ class CenterCropOp : public TensorOp {
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
 
+  std::string Name() const override { return kCenterCropOp; }
+
  private:
   int32_t crop_het_;
   int32_t crop_wid_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/cut_out_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/cut_out_op.cc
similarity index 86%
rename from mindspore/ccsrc/dataset/kernels/image/cut_out_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/cut_out_op.cc
index 74d9df5d6b2..578138d427a 100644
--- a/mindspore/ccsrc/dataset/kernels/image/cut_out_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/cut_out_op.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
 */
-#include "dataset/kernels/image/cut_out_op.h"
+#include "minddata/dataset/kernels/image/cut_out_op.h"
 
 #include <random>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/cut_out_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/cut_out_op.h
similarity index 91%
rename from mindspore/ccsrc/dataset/kernels/image/cut_out_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/cut_out_op.h
index 2198f23e441..263cbdb27c3 100644
--- a/mindspore/ccsrc/dataset/kernels/image/cut_out_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/cut_out_op.h
@@ -22,10 +22,10 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -61,6 +61,8 @@ class CutOutOp : public TensorOp {
   // @return Status - The error code return
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kCutOutOp; }
+
  private:
   std::mt19937 rnd_;
   int32_t box_height_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/decode_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/decode_op.cc
similarity index 92%
rename from mindspore/ccsrc/dataset/kernels/image/decode_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/decode_op.cc
index ef6cf88b3b7..5bc5377de95 100644
--- a/mindspore/ccsrc/dataset/kernels/image/decode_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/decode_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/decode_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/decode_op.h
similarity index 87%
rename from mindspore/ccsrc/dataset/kernels/image/decode_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/decode_op.h
index 6e7180958a3..29bf1d0146d 100644
--- a/mindspore/ccsrc/dataset/kernels/image/decode_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/decode_op.h
@@ -18,10 +18,11 @@
 
 #include <memory>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -40,6 +41,8 @@ class DecodeOp : public TensorOp {
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
   Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
 
+  std::string Name() const override { return kDecodeOp; }
+
  private:
   bool is_rgb_format_ = true;
 };
diff --git a/mindspore/ccsrc/dataset/kernels/image/hwc_to_chw_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.cc
similarity index 89%
rename from mindspore/ccsrc/dataset/kernels/image/hwc_to_chw_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.cc
index 8ed2229cd1d..5013958562e 100644
--- a/mindspore/ccsrc/dataset/kernels/image/hwc_to_chw_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/hwc_to_chw_op.h"
+#include "minddata/dataset/kernels/image/hwc_to_chw_op.h"
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/hwc_to_chw_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.h
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/image/hwc_to_chw_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.h
index 825ffa4443c..0d5f70f8952 100644
--- a/mindspore/ccsrc/dataset/kernels/image/hwc_to_chw_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.h
@@ -18,10 +18,11 @@
 
 #include <memory>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -31,6 +32,8 @@ class HwcToChwOp : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
+
+  std::string Name() const override { return kHwcToChwOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
similarity index 92%
rename from mindspore/ccsrc/dataset/kernels/image/image_utils.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
index ded9a8db114..ddbce3e23ac 100644
--- a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
 #include <opencv2/imgproc/types_c.h>
 #include <algorithm>
 #include <vector>
@@ -21,11 +21,11 @@
 #include <utility>
 #include <opencv2/imgcodecs.hpp>
 #include "common/utils.h"
-#include "dataset/core/constants.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/util/random.h"
 
 #define MAX_INT_PRECISION 16777216  // float int precision is 16777216
 namespace mindspore {
@@ -121,14 +121,14 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
   }
 }
 
-bool HasJpegMagic(const std::shared_ptr<Tensor> &input) {
+bool IsNonEmptyJPEG(const std::shared_ptr<Tensor> &input) {
   const unsigned char *kJpegMagic = (unsigned char *)"\xFF\xD8\xFF";
   constexpr size_t kJpegMagicLen = 3;
-  return input->SizeInBytes() >= kJpegMagicLen && memcmp(input->GetBuffer(), kJpegMagic, kJpegMagicLen) == 0;
+  return input->SizeInBytes() > kJpegMagicLen && memcmp(input->GetBuffer(), kJpegMagic, kJpegMagicLen) == 0;
 }
 
 Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  if (HasJpegMagic(input)) {
+  if (IsNonEmptyJPEG(input)) {
     return JpegCropAndDecode(input, output);
   } else {
     return DecodeCv(input, output);
@@ -311,7 +311,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
   TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents});
   auto output_tensor = std::make_shared<Tensor>(ts, DataType(DataType::DE_UINT8));
   const int buffer_size = output_tensor->SizeInBytes();
-  JSAMPLE *buffer = static_cast<JSAMPLE *>(reinterpret_cast<uchar *>(&(*output_tensor->begin<uint8_t>())));
+  JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>()));
   const int max_scanlines_to_read = skipped_scanlines + crop_h;
   // stride refers to output tensor, which has 3 components at most
   const int stride = crop_w * kOutNumComponents;
@@ -729,7 +729,6 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output
     int num_channels = input_cv->shape()[2];
     if (input_cv->Rank() == 3 && num_channels == 1 && output_cv->Rank() == 2) output_cv->ExpandDim(2);
     *output = std::static_pointer_cast<Tensor>(output_cv);
-
     return Status::OK();
   } catch (const cv::Exception &e) {
     RETURN_STATUS_UNEXPECTED("Unexpected error in pad");
@@ -740,22 +739,16 @@ Status UpdateBBoxesForCrop(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount,
                            int CB_Ymax) {
   // PASS LIST, COUNT OF BOUNDING BOXES
   // Also PAss X/Y Min/Max of image cropped region - normally obtained from 'GetCropBox' functions
-  uint32_t bb_Xmin_t, bb_Ymin_t, bb_Xmax_t, bb_Ymax_t;
-
+  float bb_Xmin = 0.0, bb_Ymin = 0.0, bb_Xmax = 0.0, bb_Ymax = 0.0;
   std::vector<int> correct_ind;
-  std::vector<uint32_t> copyVals;
+  std::vector<float> copyVals;
   dsize_t bboxDim = (*bboxList)->shape()[1];
   bool retFlag = false;  // true unless overlap found
   for (int i = 0; i < *bboxCount; i++) {
-    int bb_Xmin, bb_Xmax, bb_Ymin, bb_Ymax;
-    RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&bb_Xmin_t, {i, 0}));
-    RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&bb_Ymin_t, {i, 1}));
-    RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&bb_Xmax_t, {i, 2}));
-    RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&bb_Ymax_t, {i, 3}));
-    bb_Xmin = bb_Xmin_t;
-    bb_Ymin = bb_Ymin_t;
-    bb_Xmax = bb_Xmax_t;
-    bb_Ymax = bb_Ymax_t;
+    RETURN_IF_NOT_OK((*bboxList)->GetItemAt<float>(&bb_Xmin, {i, 0}));
+    RETURN_IF_NOT_OK((*bboxList)->GetItemAt<float>(&bb_Ymin, {i, 1}));
+    RETURN_IF_NOT_OK((*bboxList)->GetItemAt<float>(&bb_Xmax, {i, 2}));
+    RETURN_IF_NOT_OK((*bboxList)->GetItemAt<float>(&bb_Ymax, {i, 3}));
     bb_Xmax = bb_Xmin + bb_Xmax;
     bb_Ymax = bb_Ymin + bb_Ymax;
     // check for image / BB overlap
@@ -766,23 +759,31 @@ Status UpdateBBoxesForCrop(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount,
     correct_ind.push_back(i);
     // adjust BBox corners by bringing into new CropBox if beyond
     // Also reseting/adjusting for boxes to lie within CropBox instead of Image - subtract CropBox Xmin/YMin
-    bb_Xmin = bb_Xmin - (std::min(0, (bb_Xmin - CB_Xmin)) + CB_Xmin);
-    bb_Xmax = bb_Xmax - (std::max(0, (bb_Xmax - CB_Xmax)) + CB_Xmin);
-    bb_Ymin = bb_Ymin - (std::min(0, (bb_Ymin - CB_Ymin)) + CB_Ymin);
-    bb_Ymax = bb_Ymax - (std::max(0, (bb_Ymax - CB_Ymax)) + CB_Ymin);
+
+    bb_Xmin = bb_Xmin - std::min(static_cast<float>(0.0), (bb_Xmin - CB_Xmin)) - CB_Xmin;
+    bb_Xmax = bb_Xmax - std::max(static_cast<float>(0.0), (bb_Xmax - CB_Xmax)) - CB_Xmin;
+    bb_Ymin = bb_Ymin - std::min(static_cast<float>(0.0), (bb_Ymin - CB_Ymin)) - CB_Ymin;
+    bb_Ymax = bb_Ymax - std::max(static_cast<float>(0.0), (bb_Ymax - CB_Ymax)) - CB_Ymin;
+
+    // bound check for float values
+    bb_Xmin = std::max(bb_Xmin, static_cast<float>(0));
+    bb_Ymin = std::max(bb_Ymin, static_cast<float>(0));
+    bb_Xmax = std::min(bb_Xmax, static_cast<float>(CB_Xmax - CB_Xmin));  // find max value relative to new image
+    bb_Ymax = std::min(bb_Ymax, static_cast<float>(CB_Ymax - CB_Ymin));
+
     // reset min values and calculate width/height from Box corners
-    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 0}, static_cast<uint32_t>(bb_Xmin)));
-    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 1}, static_cast<uint32_t>(bb_Ymin)));
-    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 2}, static_cast<uint32_t>(bb_Xmax - bb_Xmin)));
-    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 3}, static_cast<uint32_t>(bb_Ymax - bb_Ymin)));
+    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 0}, bb_Xmin));
+    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 1}, bb_Ymin));
+    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 2}, bb_Xmax - bb_Xmin));
+    RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 3}, bb_Ymax - bb_Ymin));
   }
   // create new tensor and copy over bboxes still valid to the image
   // bboxes outside of new cropped region are ignored - empty tensor returned in case of none
   *bboxCount = correct_ind.size();
-  uint32_t temp;
+  float temp = 0.0;
   for (auto slice : correct_ind) {  // for every index in the loop
     for (int ix = 0; ix < bboxDim; ix++) {
-      RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&temp, {slice, ix}));
+      RETURN_IF_NOT_OK((*bboxList)->GetItemAt<float>(&temp, {slice, ix}));
       copyVals.push_back(temp);
     }
   }
@@ -794,11 +795,11 @@ Status UpdateBBoxesForCrop(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount,
 
 Status PadBBoxes(const std::shared_ptr<Tensor> *bboxList, const size_t &bboxCount, int32_t pad_top, int32_t pad_left) {
   for (int i = 0; i < bboxCount; i++) {
-    uint32_t xMin, yMin;
-    RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&xMin, {i, 0}));
-    RETURN_IF_NOT_OK((*bboxList)->GetUnsignedIntAt(&yMin, {i, 1}));
-    xMin += static_cast<uint32_t>(pad_left);  // should not be negative
-    yMin += static_cast<uint32_t>(pad_top);
+    float xMin = 0.0, yMin = 0.0;
+    RETURN_IF_NOT_OK((*bboxList)->GetItemAt<float>(&xMin, {i, 0}));
+    RETURN_IF_NOT_OK((*bboxList)->GetItemAt<float>(&yMin, {i, 1}));
+    xMin += pad_left;
+    yMin += pad_top;
     RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 0}, xMin));
     RETURN_IF_NOT_OK((*bboxList)->SetItemAt({i, 1}, yMin));
   }
@@ -807,16 +808,16 @@ Status PadBBoxes(const std::shared_ptr<Tensor> *bboxList, const size_t &bboxCoun
 
 Status UpdateBBoxesForResize(const std::shared_ptr<Tensor> &bboxList, const size_t &bboxCount, int32_t target_width_,
                              int32_t target_height_, int orig_width, int orig_height) {
-  uint32_t bb_Xmin, bb_Ymin, bb_Xwidth, bb_Ywidth;
-  // cast to float to preseve fractional
-  double W_aspRatio = (target_width_ * 1.0) / (orig_width * 1.0);
-  double H_aspRatio = (target_height_ * 1.0) / (orig_height * 1.0);
+  float bb_Xmin = 0, bb_Ymin = 0, bb_Xwidth = 0, bb_Ywidth = 0;
+  // cast to float to preserve fractional
+  float W_aspRatio = (target_width_ * 1.0) / (orig_width * 1.0);
+  float H_aspRatio = (target_height_ * 1.0) / (orig_height * 1.0);
   for (int i = 0; i < bboxCount; i++) {
     // for each bounding box
-    RETURN_IF_NOT_OK(bboxList->GetUnsignedIntAt(&bb_Xmin, {i, 0}));
-    RETURN_IF_NOT_OK(bboxList->GetUnsignedIntAt(&bb_Ymin, {i, 1}));
-    RETURN_IF_NOT_OK(bboxList->GetUnsignedIntAt(&bb_Xwidth, {i, 2}));
-    RETURN_IF_NOT_OK(bboxList->GetUnsignedIntAt(&bb_Ywidth, {i, 3}));
+    RETURN_IF_NOT_OK(bboxList->GetItemAt<float>(&bb_Xmin, {i, 0}));
+    RETURN_IF_NOT_OK(bboxList->GetItemAt<float>(&bb_Ymin, {i, 1}));
+    RETURN_IF_NOT_OK(bboxList->GetItemAt<float>(&bb_Xwidth, {i, 2}));
+    RETURN_IF_NOT_OK(bboxList->GetItemAt<float>(&bb_Ywidth, {i, 3}));
     // update positions and widths
     bb_Xmin = bb_Xmin * W_aspRatio;
     bb_Ymin = bb_Ymin * H_aspRatio;
diff --git a/mindspore/ccsrc/dataset/kernels/image/image_utils.h b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
similarity index 97%
rename from mindspore/ccsrc/dataset/kernels/image/image_utils.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
index 57ffce6a121..f489c7367b9 100644
--- a/mindspore/ccsrc/dataset/kernels/image/image_utils.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
@@ -29,16 +29,12 @@
 #include "./jpeglib.h"
 #include "./jerror.h"
 #include <opencv2/imgproc/imgproc.hpp>
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
-enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 };
-
-enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 };
-
 void JpegErrorExitCustom(j_common_ptr cinfo);
 
 struct JpegErrorManagerCustom {
@@ -96,7 +92,7 @@ Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
 
 Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
 
-bool HasJpegMagic(const std::shared_ptr<Tensor> &input);
+bool IsNonEmptyJPEG(const std::shared_ptr<Tensor> &input);
 
 void JpegSetSource(j_decompress_ptr c_info, const void *data, int64_t data_size);
 
diff --git a/mindspore/ccsrc/dataset/kernels/image/normalize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/normalize_op.cc
similarity index 89%
rename from mindspore/ccsrc/dataset/kernels/image/normalize_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/normalize_op.cc
index 638eaad264a..de5deb31efa 100644
--- a/mindspore/ccsrc/dataset/kernels/image/normalize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/normalize_op.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/normalize_op.h"
+#include "minddata/dataset/kernels/image/normalize_op.h"
 
 #include <random>
 
-#include "dataset/core/cv_tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/normalize_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/normalize_op.h
similarity index 83%
rename from mindspore/ccsrc/dataset/kernels/image/normalize_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/normalize_op.h
index 7aa6fa69bd7..7821869c8f8 100644
--- a/mindspore/ccsrc/dataset/kernels/image/normalize_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/normalize_op.h
@@ -17,11 +17,12 @@
 #define DATASET_KERNELS_IMAGE_NORMALIZE_OP_H_
 
 #include <memory>
+#include <string>
 
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -35,6 +36,8 @@ class NormalizeOp : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kNormalizeOp; }
+
  private:
   std::shared_ptr<CVTensor> mean_;
   std::shared_ptr<CVTensor> std_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/pad_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/pad_op.cc
similarity index 91%
rename from mindspore/ccsrc/dataset/kernels/image/pad_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/pad_op.cc
index b4d9c2bbf0b..52f32e2b1b9 100644
--- a/mindspore/ccsrc/dataset/kernels/image/pad_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/pad_op.cc
@@ -13,10 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/pad_op.h"
+#include "minddata/dataset/kernels/image/pad_op.h"
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/pad_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/pad_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/kernels/image/pad_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/pad_op.h
index 76d99d01626..94370584066 100644
--- a/mindspore/ccsrc/dataset/kernels/image/pad_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/pad_op.h
@@ -18,11 +18,12 @@
 
 #include <memory>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -53,6 +54,8 @@ class PadOp : public TensorOp {
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
 
+  std::string Name() const override { return kPadOp; }
+
  private:
   int32_t pad_top_;
   int32_t pad_bottom_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_color_adjust_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_adjust_op.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/kernels/image/random_color_adjust_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_color_adjust_op.cc
index e420f86e9a2..6dbf30c33e3 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_color_adjust_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_adjust_op.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_color_adjust_op.h"
+#include "minddata/dataset/kernels/image/random_color_adjust_op.h"
 
 #include <random>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_color_adjust_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_adjust_op.h
similarity index 93%
rename from mindspore/ccsrc/dataset/kernels/image/random_color_adjust_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_color_adjust_op.h
index 74d1ec450b1..fb29b570629 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_color_adjust_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_adjust_op.h
@@ -21,9 +21,9 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -57,6 +57,8 @@ class RandomColorAdjustOp : public TensorOp {
   // @return Status - The error code return.
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kRandomColorAdjustOp; }
+
  private:
   std::mt19937 rnd_;
   float bright_factor_start_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc
index c5b5f20c638..8a7364d6667 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_crop_and_resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
 #include <random>
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.h
similarity index 84%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.h
index db805a9374a..41d775fdf7c 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.h
@@ -19,11 +19,12 @@
 #include <memory>
 #include <random>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -41,6 +42,12 @@ class RandomCropAndResizeOp : public TensorOp {
                         float scale_ub = kDefScaleUb, float aspect_lb = kDefAspectLb, float aspect_ub = kDefAspectUb,
                         InterpolationMode interpolation = kDefInterpolation, int32_t max_iter = kDefMaxIter);
 
+  RandomCropAndResizeOp() = default;
+
+  RandomCropAndResizeOp(const RandomCropAndResizeOp &rhs) = default;
+
+  RandomCropAndResizeOp(RandomCropAndResizeOp &&rhs) = default;
+
   ~RandomCropAndResizeOp() override = default;
 
   void Print(std::ostream &out) const override {
@@ -52,6 +59,8 @@ class RandomCropAndResizeOp : public TensorOp {
 
   Status GetCropBox(int h_in, int w_in, int *x, int *y, int *crop_height, int *crop_width);
 
+  std::string Name() const override { return kRandomCropAndResizeOp; }
+
  protected:
   int32_t target_height_;
   int32_t target_width_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc
similarity index 89%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc
index fbaf2c9326d..98bfe412410 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc
@@ -17,10 +17,10 @@
 #include <random>
 #include <utility>
 
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/image/random_crop_and_resize_with_bbox_op.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h
similarity index 92%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h
index 9675d43933b..ddaac10fac2 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h
@@ -16,7 +16,8 @@
 #ifndef DATASET_KERNELS_IMAGE_RANDOM_CROP_AND_RESIZE_WITH_BBOX_OP_H_
 #define DATASET_KERNELS_IMAGE_RANDOM_CROP_AND_RESIZE_WITH_BBOX_OP_H_
 
-#include "dataset/kernels/image/random_crop_and_resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
+#include <string>
 
 namespace mindspore {
 namespace dataset {
@@ -39,6 +40,8 @@ class RandomCropAndResizeWithBBoxOp : public RandomCropAndResizeOp {
   }
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
+
+  std::string Name() const override { return kRandomCropAndResizeWithBBoxOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_decode_resize_op.cc
similarity index 90%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_decode_resize_op.cc
index 74aa91ea7ee..d62aebd37f9 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_decode_resize_op.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_crop_decode_resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_decode_resize_op.h"
 #include <random>
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/core/config_manager.h"
-#include "dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -31,7 +31,7 @@ Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr<Tensor> &input, s
   if (input == nullptr) {
     RETURN_STATUS_UNEXPECTED("input tensor is null");
   }
-  if (!HasJpegMagic(input)) {
+  if (!IsNonEmptyJPEG(input)) {
     DecodeOp op(true);
     std::shared_ptr<Tensor> decoded;
     RETURN_IF_NOT_OK(op.Compute(input, &decoded));
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_decode_resize_op.h
similarity index 78%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_decode_resize_op.h
index 95661699464..863fd48c148 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_decode_resize_op.h
@@ -20,12 +20,12 @@
 #include <random>
 #include <string>
 #include <vector>
-#include "dataset/core/tensor.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/image/random_crop_and_resize_op.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -35,6 +35,8 @@ class RandomCropDecodeResizeOp : public RandomCropAndResizeOp {
                            float scale_ub = kDefScaleUb, float aspect_lb = kDefAspectLb, float aspect_ub = kDefAspectUb,
                            InterpolationMode interpolation = kDefInterpolation, int32_t max_iter = kDefMaxIter);
 
+  explicit RandomCropDecodeResizeOp(const RandomCropAndResizeOp &rhs) : RandomCropAndResizeOp(rhs) {}
+
   ~RandomCropDecodeResizeOp() override = default;
 
   void Print(std::ostream &out) const override {
@@ -43,6 +45,8 @@ class RandomCropDecodeResizeOp : public RandomCropAndResizeOp {
   }
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+
+  std::string Name() const override { return kRandomCropDecodeResizeOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc
index 110d769f260..51772e9ec39 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_crop_op.h"
+#include "minddata/dataset/kernels/image/random_crop_op.h"
 #include <random>
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.h
similarity index 91%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.h
index cd43ec1efb0..44f1789f9d8 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.h
@@ -19,11 +19,12 @@
 #include <memory>
 #include <random>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -45,6 +46,10 @@ class RandomCropOp : public TensorOp {
                BorderType border_types = kDefBorderType, bool pad_if_needed = kDefPadIfNeeded,
                uint8_t fill_r = kDefFillR, uint8_t fill_g = kDefFillG, uint8_t fill_b = kDefFillB);
 
+  RandomCropOp(const RandomCropOp &rhs) = default;
+
+  RandomCropOp(RandomCropOp &&rhs) = default;
+
   ~RandomCropOp() override = default;
 
   void Print(std::ostream &out) const override { out << "RandomCropOp: " << crop_height_ << " " << crop_width_; }
@@ -72,6 +77,8 @@ class RandomCropOp : public TensorOp {
 
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
 
+  std::string Name() const override { return kRandomCropOp; }
+
  protected:
   int32_t crop_height_ = 0;
   int32_t crop_width_ = 0;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_with_bbox_op.cc
similarity index 91%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_with_bbox_op.cc
index c873307afdd..08b12b8b706 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_with_bbox_op.cc
@@ -18,10 +18,10 @@
 #include <algorithm>
 #include <utility>
 
-#include "dataset/kernels/image/random_crop_with_bbox_op.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/random_crop_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_with_bbox_op.h
similarity index 93%
rename from mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_with_bbox_op.h
index 88a58d35574..bfcd1610d3a 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_with_bbox_op.h
@@ -18,8 +18,9 @@
 
 #include <memory>
 #include <vector>
+#include <string>
 
-#include "dataset/kernels/image/random_crop_op.h"
+#include "minddata/dataset/kernels/image/random_crop_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -41,6 +42,8 @@ class RandomCropWithBBoxOp : public RandomCropOp {
   }
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
+
+  std::string Name() const override { return kRandomCropWithBBoxOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_op.cc
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_op.cc
index ae76e1bf591..5e8ab8a6347 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_horizontal_flip_op.h"
+#include "minddata/dataset/kernels/image/random_horizontal_flip_op.h"
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_op.h
similarity index 86%
rename from mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_op.h
index efea1245334..9e089291805 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_op.h
@@ -18,11 +18,12 @@
 
 #include <memory>
 #include <random>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -47,6 +48,8 @@ class RandomHorizontalFlipOp : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kRandomHorizontalFlipOp; }
+
  private:
   std::mt19937 rnd_;
   std::bernoulli_distribution distribution_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.cc
similarity index 68%
rename from mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.cc
index 5a5c632e81d..809f564b188 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.cc
@@ -14,11 +14,10 @@
  * limitations under the License.
  */
 #include <utility>
-#include "dataset/kernels/image/random_horizontal_flip_bbox_op.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/pybind_support.h"
+#include "minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/cv_tensor.h"
 
 namespace mindspore {
 namespace dataset {
@@ -31,21 +30,19 @@ Status RandomHorizontalFlipWithBBoxOp::Compute(const TensorRow &input, TensorRow
     // To test bounding boxes algorithm, create random bboxes from image dims
     size_t num_of_boxes = input[1]->shape()[0];      // set to give number of bboxes
     float img_center = (input[0]->shape()[1] / 2.);  // get the center of the image
-
     for (int i = 0; i < num_of_boxes; i++) {
-      uint32_t b_w = 0;  // bounding box width
-      uint32_t min_x = 0;
+      float b_w = 0;  // bounding box width
+      float min_x = 0;
       // get the required items
-      input[1]->GetItemAt<uint32_t>(&min_x, {i, 0});
-      input[1]->GetItemAt<uint32_t>(&b_w, {i, 2});
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&min_x, {i, 0}));
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&b_w, {i, 2}));
       // do the flip
-      float diff = img_center - min_x;          // get distance from min_x to center
-      uint32_t refl_min_x = diff + img_center;  // get reflection of min_x
-      uint32_t new_min_x = refl_min_x - b_w;    // subtract from the reflected min_x to get the new one
-      input[1]->SetItemAt<uint32_t>({i, 0}, new_min_x);
+      float diff = img_center - min_x;       // get distance from min_x to center
+      float refl_min_x = diff + img_center;  // get reflection of min_x
+      float new_min_x = refl_min_x - b_w;    // subtract from the reflected min_x to get the new one
+      RETURN_IF_NOT_OK(input[1]->SetItemAt<float>({i, 0}, new_min_x));
     }
-    (*output).push_back(nullptr);
-    (*output).push_back(nullptr);
+    (*output).resize(2);
     // move input to output pointer of bounding boxes
     (*output)[1] = std::move(input[1]);
     // perform HorizontalFlip on the image
@@ -55,6 +52,5 @@ Status RandomHorizontalFlipWithBBoxOp::Compute(const TensorRow &input, TensorRow
   *output = input;
   return Status::OK();
 }
-
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h
similarity index 86%
rename from mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h
index 06c96e11ae9..d98669ea139 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h
@@ -16,18 +16,15 @@
 #ifndef DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_
 #define DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_
 
-#include <pybind11/numpy.h>
-#include <pybind11/stl.h>
 #include <memory>
 #include <random>
 #include <cstdlib>
+#include <string>
 #include <opencv2/imgproc/imgproc.hpp>
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
-#include "pybind11/pybind11.h"
-#include "pybind11/stl_bind.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -52,6 +49,8 @@ class RandomHorizontalFlipWithBBoxOp : public TensorOp {
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
+  std::string Name() const override { return kRandomHorizontalFlipWithBBoxOp; }
+
  private:
   std::mt19937 rnd_;
   std::bernoulli_distribution distribution_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_resize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_op.cc
similarity index 81%
rename from mindspore/ccsrc/dataset/kernels/image/random_resize_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_op.cc
index c14224a9308..8736f0a6a5b 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_resize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_op.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_resize_op.h"
+#include "minddata/dataset/kernels/image/random_resize_op.h"
 
 #include <random>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_resize_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_op.h
similarity index 83%
rename from mindspore/ccsrc/dataset/kernels/image/random_resize_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_op.h
index af23803d4cb..8b2b067751b 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_resize_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_op.h
@@ -18,12 +18,13 @@
 
 #include <memory>
 #include <random>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/image/resize_op.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -45,6 +46,8 @@ class RandomResizeOp : public ResizeOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kRandomResizeOp; }
+
  private:
   std::mt19937 random_generator_;
   std::uniform_int_distribution<int> distribution_{0, 3};
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_resize_with_bbox_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_with_bbox_op.cc
similarity index 86%
rename from mindspore/ccsrc/dataset/kernels/image/random_resize_with_bbox_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_with_bbox_op.cc
index de69c02e39e..e099b78a0f7 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_resize_with_bbox_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_with_bbox_op.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "dataset/kernels/image/random_resize_with_bbox_op.h"
-#include "dataset/kernels/image/resize_with_bbox_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/random_resize_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/resize_with_bbox_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_resize_with_bbox_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_with_bbox_op.h
similarity index 81%
rename from mindspore/ccsrc/dataset/kernels/image/random_resize_with_bbox_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_with_bbox_op.h
index 4a7614525fb..6bad0d30fa8 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_resize_with_bbox_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_resize_with_bbox_op.h
@@ -19,13 +19,14 @@
 
 #include <memory>
 #include <random>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/image/resize_op.h"
-#include "dataset/kernels/image/resize_with_bbox_op.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/image/resize_with_bbox_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -46,6 +47,8 @@ class RandomResizeWithBBoxOp : public ResizeWithBBoxOp {
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
+  std::string Name() const override { return kRandomResizeWithBBoxOp; }
+
  private:
   std::mt19937 random_generator_;
   std::uniform_int_distribution<int> distribution_{0, 3};
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_rotation_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_rotation_op.cc
similarity index 92%
rename from mindspore/ccsrc/dataset/kernels/image/random_rotation_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_rotation_op.cc
index 65e024865b2..b2cb4facae7 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_rotation_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_rotation_op.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_rotation_op.h"
+#include "minddata/dataset/kernels/image/random_rotation_op.h"
 
 #include <random>
 
-#include "dataset/core/cv_tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/random.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_rotation_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_rotation_op.h
similarity index 92%
rename from mindspore/ccsrc/dataset/kernels/image/random_rotation_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_rotation_op.h
index d30cd242882..ea679ccb565 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_rotation_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_rotation_op.h
@@ -19,11 +19,12 @@
 #include <memory>
 #include <random>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
-#include "dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
 
 namespace mindspore {
 namespace dataset {
@@ -68,6 +69,8 @@ class RandomRotationOp : public TensorOp {
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
 
+  std::string Name() const override { return kRandomRotationOp; }
+
  private:
   float degree_start_;
   float degree_end_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_op.cc
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_op.cc
index 096923a9ecd..24d816ef1a0 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_op.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "dataset/kernels/image/random_vertical_flip_op.h"
+#include "minddata/dataset/kernels/image/random_vertical_flip_op.h"
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_op.h
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_op.h
index 18693bc0eb2..cee5869c71a 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_op.h
@@ -18,11 +18,12 @@
 
 #include <memory>
 #include <random>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
@@ -41,6 +42,8 @@ class RandomVerticalFlipOp : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kRandomVerticalFlipOp; }
+
  private:
   std::mt19937 rnd_;
   std::bernoulli_distribution distribution_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc
similarity index 77%
rename from mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc
index ffea851eac1..7d2fa7bab50 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc
@@ -16,9 +16,9 @@
 
 #include <utility>
 
-#include "dataset/util/status.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/image/random_vertical_flip_with_bbox_op.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -34,14 +34,13 @@ Status RandomVerticalFlipWithBBoxOp::Compute(const TensorRow &input, TensorRow *
     // one time allocation -> updated in the loop
     // type defined based on VOC test dataset
     for (int i = 0; i < boxCount; i++) {
-      uint32_t boxCorner_y = 0;
-      uint32_t boxHeight = 0;
-      uint32_t newBoxCorner_y = 0;
-      RETURN_IF_NOT_OK(input[1]->GetUnsignedIntAt(&boxCorner_y, {i, 1}));  // get min y of bbox
-      RETURN_IF_NOT_OK(input[1]->GetUnsignedIntAt(&boxHeight, {i, 3}));    // get height of bbox
+      float boxCorner_y = 0.0, boxHeight = 0.0;
+      float newBoxCorner_y = 0.0;
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&boxCorner_y, {i, 1}));  // get min y of bbox
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&boxHeight, {i, 3}));    // get height of bbox
 
       // subtract (curCorner + height) from (max) for new Corner position
-      newBoxCorner_y = (imHeight - 1) - ((boxCorner_y + boxHeight) - 1);
+      newBoxCorner_y = (imHeight - 1.0) - ((boxCorner_y + boxHeight) - 1.0);
       RETURN_IF_NOT_OK(input[1]->SetItemAt({i, 1}, newBoxCorner_y));
     }
 
diff --git a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.h
similarity index 85%
rename from mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.h
index 4764cc2b752..c9f19f52175 100644
--- a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.h
@@ -18,11 +18,12 @@
 
 #include <memory>
 #include <random>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
@@ -42,6 +43,8 @@ class RandomVerticalFlipWithBBoxOp : public TensorOp {
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
+  std::string Name() const override { return kRandomVerticalFlipWithBBoxOp; }
+
  private:
   std::mt19937 rnd_;
   std::bernoulli_distribution distribution_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/rescale_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/rescale_op.cc
similarity index 87%
rename from mindspore/ccsrc/dataset/kernels/image/rescale_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/rescale_op.cc
index fd1807991c0..2a500d6c34a 100644
--- a/mindspore/ccsrc/dataset/kernels/image/rescale_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/rescale_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/rescale_op.h"
+#include "minddata/dataset/kernels/image/rescale_op.h"
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/rescale_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/rescale_op.h
similarity index 87%
rename from mindspore/ccsrc/dataset/kernels/image/rescale_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/rescale_op.h
index 8aee75b0c1b..c70b7bf6cff 100644
--- a/mindspore/ccsrc/dataset/kernels/image/rescale_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/rescale_op.h
@@ -18,10 +18,11 @@
 
 #include <memory>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -38,6 +39,8 @@ class RescaleOp : public TensorOp {
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
   Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
 
+  std::string Name() const override { return kRescaleOp; }
+
  private:
   float rescale_;
   float shift_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/resize_bilinear_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_bilinear_op.cc
similarity index 89%
rename from mindspore/ccsrc/dataset/kernels/image/resize_bilinear_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/resize_bilinear_op.cc
index 658caac6a50..48a8fbbc533 100644
--- a/mindspore/ccsrc/dataset/kernels/image/resize_bilinear_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_bilinear_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/resize_bilinear_op.h"
+#include "minddata/dataset/kernels/image/resize_bilinear_op.h"
 #include <random>
 
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/resize_bilinear_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_bilinear_op.h
similarity index 88%
rename from mindspore/ccsrc/dataset/kernels/image/resize_bilinear_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/resize_bilinear_op.h
index c8c2a5185b6..fd8f940946e 100644
--- a/mindspore/ccsrc/dataset/kernels/image/resize_bilinear_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_bilinear_op.h
@@ -20,10 +20,10 @@
 #include <random>
 #include <string>
 #include <vector>
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/image/resize_op.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -51,6 +51,8 @@ class ResizeBilinearOp : public ResizeOp {
   // Name: Print()
   // Description: A function that prints info about the node
   void Print(std::ostream &out) const override;
+
+  std::string Name() const override { return kResizeBilinearOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/resize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/kernels/image/resize_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc
index 7c0252188e2..7456f50f324 100644
--- a/mindspore/ccsrc/dataset/kernels/image/resize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
 
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/resize_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.h
similarity index 86%
rename from mindspore/ccsrc/dataset/kernels/image/resize_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.h
index 5a35a6076c4..3f847243ff2 100644
--- a/mindspore/ccsrc/dataset/kernels/image/resize_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.h
@@ -18,11 +18,12 @@
 
 #include <memory>
 #include <vector>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -43,6 +44,10 @@ class ResizeOp : public TensorOp {
   explicit ResizeOp(int32_t size1, int32_t size2 = kDefWidth, InterpolationMode mInterpolation = kDefInterpolation)
       : size1_(size1), size2_(size2), interpolation_(mInterpolation) {}
 
+  ResizeOp(const ResizeOp &rhs) = default;
+
+  ResizeOp(ResizeOp &&rhs) = default;
+
   ~ResizeOp() override = default;
 
   void Print(std::ostream &out) const override { out << "ResizeOp: " << size1_ << " " << size2_; }
@@ -50,6 +55,8 @@ class ResizeOp : public TensorOp {
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
 
+  std::string Name() const override { return kResizeOp; }
+
  protected:
   int32_t size1_;
   int32_t size2_;
diff --git a/mindspore/ccsrc/dataset/kernels/image/resize_with_bbox_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.cc
similarity index 80%
rename from mindspore/ccsrc/dataset/kernels/image/resize_with_bbox_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.cc
index 8a633d5678c..9df2d8a25ee 100644
--- a/mindspore/ccsrc/dataset/kernels/image/resize_with_bbox_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "dataset/kernels/image/resize_with_bbox_op.h"
+#include "minddata/dataset/kernels/image/resize_with_bbox_op.h"
 #include <utility>
 #include <memory>
-#include "dataset/kernels/image/resize_op.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/pybind_support.h"
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/pybind_support.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/resize_with_bbox_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.h
similarity index 81%
rename from mindspore/ccsrc/dataset/kernels/image/resize_with_bbox_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.h
index 17bdd01ef12..d2b5c96bf37 100644
--- a/mindspore/ccsrc/dataset/kernels/image/resize_with_bbox_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.h
@@ -16,11 +16,12 @@
 #ifndef DATASET_KERNELS_IMAGE_RESIZE_WITH_BBOX_OP_H
 #define DATASET_KERNELS_IMAGE_RESIZE_WITH_BBOX_OP_H
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/image/image_utils.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
-#include "dataset/kernels/image/resize_op.h"
+#include <string>
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -36,6 +37,8 @@ class ResizeWithBBoxOp : public ResizeOp {
   void Print(std::ostream &out) const override { out << "ResizeWithBBoxOp: " << size1_ << " " << size2_; }
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
+
+  std::string Name() const override { return kResizeWithBBoxOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.cc
index 7889b3b157c..95d75af0f2d 100644
--- a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
 */
 #include <utility>
-#include "dataset/kernels/image/uniform_aug_op.h"
-#include "dataset/util/random.h"
+#include "minddata/dataset/kernels/image/uniform_aug_op.h"
+#include "minddata/dataset/util/random.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h
similarity index 90%
rename from mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h
index 824898ba2df..0ae0fda92bd 100644
--- a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h
@@ -21,9 +21,9 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -46,6 +46,8 @@ class UniformAugOp : public TensorOp {
   // @return Status - The error code return
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
+  std::string Name() const override { return kUniformAugOp; }
+
  private:
   int32_t num_ops_;
   std::vector<std::shared_ptr<TensorOp>> tensor_op_list_;
diff --git a/mindspore/ccsrc/dataset/kernels/no_op.h b/mindspore/ccsrc/minddata/dataset/kernels/no_op.h
similarity index 86%
rename from mindspore/ccsrc/dataset/kernels/no_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/no_op.h
index bfbdf43b366..f5a6a58f2bb 100644
--- a/mindspore/ccsrc/dataset/kernels/no_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/no_op.h
@@ -17,9 +17,10 @@
 #define DATASET_KERNELS_NO_OP_H_
 
 #include <memory>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -31,6 +32,8 @@ class NoOp : public TensorOp {
   }
 
   void Print(std::ostream &out) const override { out << "NoOp"; };
+
+  std::string Name() const override { return kNoOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/py_func_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/py_func_op.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/kernels/py_func_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/py_func_op.cc
index 0a6a1452b58..f501dd4b4f0 100644
--- a/mindspore/ccsrc/dataset/kernels/py_func_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/py_func_op.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/py_func_op.h"
+#include "minddata/dataset/kernels/py_func_op.h"
 
 #include <memory>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/kernels/py_func_op.h b/mindspore/ccsrc/minddata/dataset/kernels/py_func_op.h
similarity index 88%
rename from mindspore/ccsrc/dataset/kernels/py_func_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/py_func_op.h
index a50aceafbbd..75d222b433b 100644
--- a/mindspore/ccsrc/dataset/kernels/py_func_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/py_func_op.h
@@ -20,9 +20,10 @@
 #include <memory>
 #include <vector>
 #include <utility>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 
 namespace mindspore {
 namespace dataset {
@@ -38,6 +39,8 @@ class __attribute__((visibility("hidden"))) PyFuncOp : public TensorOp {
   // Compute function for n-n mapping.
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
+  std::string Name() const override { return kPyFuncOp; }
+
  private:
   py::function py_func_ptr_;
 };
diff --git a/mindspore/ccsrc/dataset/kernels/tensor_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/kernels/tensor_op.cc
rename to mindspore/ccsrc/minddata/dataset/kernels/tensor_op.cc
index 92aef8dc9ef..b625e3b532c 100644
--- a/mindspore/ccsrc/dataset/kernels/tensor_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 #include <iostream>
 #include <memory>
 #include <mutex>
diff --git a/mindspore/ccsrc/dataset/kernels/tensor_op.h b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
similarity index 65%
rename from mindspore/ccsrc/dataset/kernels/tensor_op.h
rename to mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
index 9aae50d6b0b..3bcba4b4630 100644
--- a/mindspore/ccsrc/dataset/kernels/tensor_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
@@ -20,9 +20,9 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_row.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_row.h"
+#include "minddata/dataset/util/status.h"
 
 #define IO_CHECK(input, output)                             \
   do {                                                      \
@@ -62,14 +62,16 @@
     uint32_t img_h = input[0]->shape()[0];                                                  \
     uint32_t img_w = input[0]->shape()[1];                                                  \
     for (uint32_t i = 0; i < num_of_boxes; i++) {                                           \
-      uint32_t min_x = 0;                                                                   \
-      uint32_t min_y = 0;                                                                   \
-      uint32_t b_w = 0;                                                                     \
-      uint32_t b_h = 0;                                                                     \
-      input[1]->GetItemAt<uint32_t>(&min_x, {i, 0});                                        \
-      input[1]->GetItemAt<uint32_t>(&min_y, {i, 1});                                        \
-      input[1]->GetItemAt<uint32_t>(&b_w, {i, 2});                                          \
-      input[1]->GetItemAt<uint32_t>(&b_h, {i, 3});                                          \
+      float min_x = 0.0, min_y = 0.0, b_w = 0.0, b_h = 0.0;                                 \
+      bool passing_data_fetch = true;                                                       \
+      passing_data_fetch &= input[1]->GetItemAt<float>(&min_x, {i, 0}).IsOk();              \
+      passing_data_fetch &= input[1]->GetItemAt<float>(&min_y, {i, 1}).IsOk();              \
+      passing_data_fetch &= input[1]->GetItemAt<float>(&b_w, {i, 2}).IsOk();                \
+      passing_data_fetch &= input[1]->GetItemAt<float>(&b_h, {i, 3}).IsOk();                \
+      if (!passing_data_fetch) {                                                            \
+        return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,                     \
+                      "Fetching BBox values failed in BOUNDING_BOX_CHECK.");                \
+      }                                                                                     \
       if ((min_x + b_w > img_w) || (min_y + b_h > img_h)) {                                 \
         return Status(StatusCode::kBoundingBoxOutOfBounds, __LINE__, __FILE__,              \
                       "At least one of the bounding boxes is out of bounds of the image."); \
@@ -83,6 +85,66 @@
 
 namespace mindspore {
 namespace dataset {
+
+// image
+constexpr char kBoundingBoxAugmentOp[] = "BoundingBoxAugmentOp";
+constexpr char kDecodeOp[] = "DecodeOp";
+constexpr char kCenterCropOp[] = "CenterCropOp";
+constexpr char kCutOutOp[] = "CutOutOp";
+constexpr char kHwcToChwOp[] = "HwcToChwOp";
+constexpr char kNormalizeOp[] = "NormalizeOp";
+constexpr char kPadOp[] = "PadOp";
+constexpr char kRandomColorAdjustOp[] = "RandomColorAdjustOp";
+constexpr char kRandomCropAndResizeOp[] = "RandomCropAndResizeOp";
+constexpr char kRandomCropAndResizeWithBBoxOp[] = "RandomCropAndResizeWithBBoxOp";
+constexpr char kRandomCropDecodeResizeOp[] = "RandomCropDecodeResizeOp";
+constexpr char kRandomCropOp[] = "RandomCropOp";
+constexpr char kRandomCropWithBBoxOp[] = "RandomCropWithBBoxOp";
+constexpr char kRandomHorizontalFlipWithBBoxOp[] = "RandomHorizontalFlipWithBBoxOp";
+constexpr char kRandomHorizontalFlipOp[] = "RandomHorizontalFlipOp";
+constexpr char kRandomResizeOp[] = "RandomResizeOp";
+constexpr char kRandomResizeWithBBoxOp[] = "RandomResizeWithBBoxOp";
+constexpr char kRandomRotationOp[] = "RandomRotationOp";
+constexpr char kRandomVerticalFlipOp[] = "RandomVerticalFlipOp";
+constexpr char kRandomVerticalFlipWithBBoxOp[] = "RandomVerticalFlipWithBBoxOp";
+constexpr char kRescaleOp[] = "RescaleOp";
+constexpr char kResizeBilinearOp[] = "ResizeBilinearOp";
+constexpr char kResizeOp[] = "ResizeOp";
+constexpr char kResizeWithBBoxOp[] = "ResizeWithBBoxOp";
+constexpr char kUniformAugOp[] = "UniformAugOp";
+
+// text
+constexpr char kBasicTokenizerOp[] = "BasicTokenizerOp";
+constexpr char kBertTokenizerOp[] = "BertTokenizerOp";
+constexpr char kCaseFoldOp[] = "CaseFoldOp";
+constexpr char kJiebaTokenizerOp[] = "JiebaTokenizerOp";
+constexpr char kLookupOp[] = "LookupOp";
+constexpr char kNgramOp[] = "NgramOp";
+constexpr char kNormalizeUTF8Op[] = "NormalizeUTF8Op";
+constexpr char kRegexReplaceOp[] = "RegexReplaceOp";
+constexpr char kRegexTokenizerOp[] = "RegexTokenizerOp";
+constexpr char kToNumberOp[] = "ToNumberOp";
+constexpr char kTruncateSequencePairOp[] = "TruncateSequencePairOp";
+constexpr char kUnicodeCharTokenizerOp[] = "UnicodeCharTokenizerOp";
+constexpr char kUnicodeScriptTokenizerOp[] = "UnicodeScriptTokenizerOp";
+constexpr char kWhitespaceTokenizerOp[] = "WhitespaceTokenizerOp";
+constexpr char kWordpieceTokenizerOp[] = "WordpieceTokenizerOp";
+
+// data
+constexpr char kConcatenateOp[] = "kConcatenateOp";
+constexpr char kDuplicateOp[] = "DuplicateOp";
+constexpr char kFillOp[] = "FillOp";
+constexpr char kMaskOp[] = "MaskOp";
+constexpr char kOneHotOp[] = "OneHotOp";
+constexpr char kPadEndOp[] = "PadEndOp";
+constexpr char kSliceOp[] = "SliceOp";
+constexpr char kToFloat16Op[] = "ToFloat16Op";
+constexpr char kTypeCastOp[] = "TypeCastOp";
+
+// other
+constexpr char kPyFuncOp[] = "PyFuncOp";
+constexpr char kNoOp[] = "NoOp";
+
 // A class that does a computation on  a Tensor
 class TensorOp {
  public:
@@ -141,6 +203,8 @@ class TensorOp {
   // @param outputs out: vector of the types of the output tensors to be filled.
   // @return Status
   virtual Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs);
+
+  virtual std::string Name() const = 0;
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/text/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/dataset/text/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/text/CMakeLists.txt
diff --git a/mindspore/ccsrc/dataset/text/kernels/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/text/kernels/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/dataset/text/kernels/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/text/kernels/CMakeLists.txt
diff --git a/mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc
similarity index 84%
rename from mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc
index 3512a4b2d71..6195572944e 100644
--- a/mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/basic_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/basic_tokenizer_op.h"
 #include <memory>
 #include <queue>
 #include <string>
@@ -27,10 +27,12 @@
 
 namespace mindspore {
 namespace dataset {
+
 const bool BasicTokenizerOp::kDefLowerCase = false;
 const bool BasicTokenizerOp::kDefKeepWhitespace = false;
 const NormalizeForm BasicTokenizerOp::kDefNormalizationForm = NormalizeForm::kNone;
 const bool BasicTokenizerOp::kDefPreserveUnusedToken = true;
+const bool BasicTokenizerOp::kDefWithOffsets = false;
 const char BasicTokenizerOp::kCommonPattern[] =
   "[!-/]"
   "|[:-@]"
@@ -47,11 +49,14 @@ const char BasicTokenizerOp::kCommonPattern[] =
   "|[\\x{2F800}-\\x{2FA1F}]";
 const char BasicTokenizerOp::kUnusedPattern[] = "\\[CLS\\]|\\[SEP\\]|\\[UNK\\]|\\[PAD\\]|\\[MASK\\]|\\[unused\\d+\\]|";
 const std::unordered_set<std::string> BasicTokenizerOp::kUnusedWords{"[CLS]", "[SEP]", "[UNK]", "[PAD]", "[MASK]"};
-BasicTokenizerOp::BasicTokenizerOp(bool lower_case, bool keep_whitespace, NormalizeForm normalization_form,
-                                   bool preserve_unused_token)
+
+BasicTokenizerOp::BasicTokenizerOp(const bool &lower_case, const bool &keep_whitespace,
+                                   const NormalizeForm &normalization_form, const bool &preserve_unused_token,
+                                   const bool &with_offsets)
     : lower_case_(lower_case),
       keep_whitespace_(keep_whitespace),
       preserve_unused_token_(preserve_unused_token),
+      with_offsets_(with_offsets),
       case_fold_(std::make_unique<CaseFoldOp>()),
       nfd_normalize_(std::make_unique<NormalizeUTF8Op>(NormalizeForm::kNfd)),
       normalization_form_(normalization_form),
@@ -69,7 +74,7 @@ BasicTokenizerOp::BasicTokenizerOp(bool lower_case, bool keep_whitespace, Normal
     keep_delim_pattern = kUnusedPattern + keep_delim_pattern;
     delim_pattern = kUnusedPattern + delim_pattern;
   }
-  regex_tokenizer_ = std::make_unique<RegexTokenizerOp>(delim_pattern, keep_delim_pattern);
+  regex_tokenizer_ = std::make_unique<RegexTokenizerOp>(delim_pattern, keep_delim_pattern, with_offsets_);
 }
 
 Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::string_view &text,
@@ -135,9 +140,10 @@ Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::shared_ptr<Tensor
   return Status::OK();
 }
 
-Status BasicTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
+Status BasicTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
+  if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
     RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
   }
   std::shared_ptr<Tensor> cur_input;
@@ -145,10 +151,10 @@ Status BasicTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shar
   if (lower_case_) {
     if (!preserve_unused_token_) {
       // to lower case
-      RETURN_IF_NOT_OK(case_fold_->Compute(input, &processed_tensor));
+      RETURN_IF_NOT_OK(case_fold_->Compute(input[0], &processed_tensor));
     } else {
       // to lower case except words in kUnusedWords
-      RETURN_IF_NOT_OK(CaseFoldWithoutUnusedWords(input, &processed_tensor));
+      RETURN_IF_NOT_OK(CaseFoldWithoutUnusedWords(input[0], &processed_tensor));
     }
     cur_input = processed_tensor;
     // strip accent characters
@@ -156,12 +162,12 @@ Status BasicTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shar
     cur_input = processed_tensor;
     RETURN_IF_NOT_OK(replace_accent_chars_->Compute(cur_input, &processed_tensor));
   } else {
-    RETURN_IF_NOT_OK(common_normalize_->Compute(input, &processed_tensor));
+    RETURN_IF_NOT_OK(common_normalize_->Compute(input[0], &processed_tensor));
   }
   // strip control characters
   cur_input = processed_tensor;
   RETURN_IF_NOT_OK(replace_control_chars_->Compute(cur_input, &processed_tensor));
-  return regex_tokenizer_->Compute(processed_tensor, output);
+  return regex_tokenizer_->Compute(TensorRow(0, {std::move(processed_tensor)}), output);
 }
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.h
similarity index 69%
rename from mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.h
index 01827a0ba4c..cbc21273c2d 100644
--- a/mindspore/ccsrc/dataset/text/kernels/basic_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.h
@@ -19,13 +19,13 @@
 #include <string>
 #include <unordered_set>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/text/kernels/case_fold_op.h"
-#include "dataset/text/kernels/normalize_utf8_op.h"
-#include "dataset/text/kernels/regex_replace_op.h"
-#include "dataset/text/kernels/regex_tokenizer_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/text/kernels/case_fold_op.h"
+#include "minddata/dataset/text/kernels/normalize_utf8_op.h"
+#include "minddata/dataset/text/kernels/regex_replace_op.h"
+#include "minddata/dataset/text/kernels/regex_tokenizer_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -36,25 +36,31 @@ class BasicTokenizerOp : public TensorOp {
   static const bool kDefKeepWhitespace;
   static const NormalizeForm kDefNormalizationForm;
   static const bool kDefPreserveUnusedToken;
-  explicit BasicTokenizerOp(bool lower_case = kDefLowerCase, bool keep_whitespace = kDefKeepWhitespace,
-                            NormalizeForm normalization_form = kDefNormalizationForm,
-                            bool preserve_unused_token = kDefPreserveUnusedToken);
+  static const bool kDefWithOffsets;
+
+  explicit BasicTokenizerOp(const bool &lower_case = kDefLowerCase, const bool &keep_whitespace = kDefKeepWhitespace,
+                            const NormalizeForm &normalization_form = kDefNormalizationForm,
+                            const bool &preserve_unused_token = kDefPreserveUnusedToken,
+                            const bool &with_offsets = kDefWithOffsets);
 
   ~BasicTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override { out << "BasicTokenizerOp"; }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
 
  protected:
   Status CaseFoldWithoutUnusedWords(const std::string_view &text, const std::unordered_set<std::string> &unused_words,
                                     std::string *outupt);
   Status CaseFoldWithoutUnusedWords(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
 
+  std::string Name() const override { return kBasicTokenizerOp; }
+
  private:
   static const char kCommonPattern[];
   static const char kUnusedPattern[];
   static const std::unordered_set<std::string> kUnusedWords;
+  bool with_offsets_;
   bool lower_case_;
   bool keep_whitespace_;
   NormalizeForm normalization_form_;
diff --git a/mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/bert_tokenizer_op.cc
similarity index 79%
rename from mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/bert_tokenizer_op.cc
index 2b68a5accb6..631597ba24b 100644
--- a/mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/bert_tokenizer_op.cc
@@ -13,12 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/bert_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/bert_tokenizer_op.h"
 namespace mindspore {
 namespace dataset {
-Status BertTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  std::shared_ptr<Tensor> basic_tensor;
+Status BertTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  TensorRow basic_tensor;
   RETURN_IF_NOT_OK(basic_tokenizer_.Compute(input, &basic_tensor));
   RETURN_IF_NOT_OK(wordpiece_tokenizer_.Compute(basic_tensor, output));
   return Status::OK();
diff --git a/mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/bert_tokenizer_op.h
similarity index 63%
rename from mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/bert_tokenizer_op.h
index 660fdc7ba58..b281903349c 100644
--- a/mindspore/ccsrc/dataset/text/kernels/bert_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/bert_tokenizer_op.h
@@ -18,11 +18,11 @@
 #include <memory>
 #include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/text/kernels/basic_tokenizer_op.h"
-#include "dataset/text/kernels/wordpiece_tokenizer_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/text/kernels/basic_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/wordpiece_tokenizer_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -32,18 +32,21 @@ class BertTokenizerOp : public TensorOp {
                            const std::string &suffix_indicator = WordpieceTokenizerOp::kDefSuffixIndicator,
                            const int &max_bytes_per_token = WordpieceTokenizerOp::kDefMaxBytesPerToken,
                            const std::string &unknown_token = WordpieceTokenizerOp::kDefUnknownToken,
-                           bool lower_case = BasicTokenizerOp::kDefLowerCase,
-                           bool keep_whitespace = BasicTokenizerOp::kDefKeepWhitespace,
-                           NormalizeForm normalization_form = BasicTokenizerOp::kDefNormalizationForm,
-                           bool preserve_unused_token = BasicTokenizerOp::kDefPreserveUnusedToken)
-      : wordpiece_tokenizer_(vocab, suffix_indicator, max_bytes_per_token, unknown_token),
-        basic_tokenizer_(lower_case, keep_whitespace, normalization_form, preserve_unused_token) {}
+                           const bool &lower_case = BasicTokenizerOp::kDefLowerCase,
+                           const bool &keep_whitespace = BasicTokenizerOp::kDefKeepWhitespace,
+                           const NormalizeForm &normalization_form = BasicTokenizerOp::kDefNormalizationForm,
+                           const bool &preserve_unused_token = BasicTokenizerOp::kDefPreserveUnusedToken,
+                           const bool &with_offsets = WordpieceTokenizerOp::kDefWithOffsets)
+      : wordpiece_tokenizer_(vocab, suffix_indicator, max_bytes_per_token, unknown_token, with_offsets),
+        basic_tokenizer_(lower_case, keep_whitespace, normalization_form, preserve_unused_token, with_offsets) {}
 
   ~BertTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override { out << "BertTokenizerOp"; }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
+
+  std::string Name() const override { return kBertTokenizerOp; }
 
  private:
   WordpieceTokenizerOp wordpiece_tokenizer_;
diff --git a/mindspore/ccsrc/dataset/text/kernels/case_fold_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/text/kernels/case_fold_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc
index d935608efda..0ea5cadedb6 100644
--- a/mindspore/ccsrc/dataset/text/kernels/case_fold_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/case_fold_op.h"
+#include "minddata/dataset/text/kernels/case_fold_op.h"
 #include <memory>
 #include <string>
 #include <string_view>
diff --git a/mindspore/ccsrc/dataset/text/kernels/case_fold_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.h
similarity index 84%
rename from mindspore/ccsrc/dataset/text/kernels/case_fold_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.h
index d1b5ba53f1e..f7a21052692 100644
--- a/mindspore/ccsrc/dataset/text/kernels/case_fold_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.h
@@ -16,10 +16,11 @@
 #ifndef DATASET_TEXT_KERNELS_CASE_FOLD_OP_H_
 #define DATASET_TEXT_KERNELS_CASE_FOLD_OP_H_
 #include <memory>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -33,6 +34,8 @@ class CaseFoldOp : public TensorOp {
   void Print(std::ostream &out) const override { out << "CaseFoldOp"; }
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+
+  std::string Name() const override { return kCaseFoldOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.cc
new file mode 100644
index 00000000000..0a1ae92d144
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.cc
@@ -0,0 +1,94 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/text/kernels/jieba_tokenizer_op.h"
+
+#include <vector>
+#include <memory>
+#include <string>
+#include "minddata/dataset/util/path.h"
+
+namespace mindspore {
+namespace dataset {
+
+const bool JiebaTokenizerOp::kDefWithOffsets = false;
+
+JiebaTokenizerOp::JiebaTokenizerOp(const std::string &hmm_path, const std::string &dict_path, const JiebaMode &mode,
+                                   const bool &with_offsets)
+    : jieba_mode_(mode), hmm_model_path_(hmm_path), mp_dict_path_(dict_path), with_offsets_(with_offsets) {
+  jieba_parser_ = std::make_unique<cppjieba::Jieba>(mp_dict_path_, hmm_model_path_, "");
+}
+
+Status JiebaTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
+  RETURN_UNEXPECTED_IF_NULL(jieba_parser_);
+
+  if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
+    RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor");
+  }
+
+  std::string_view sentence_v;
+  RETURN_IF_NOT_OK(input[0]->GetItemAt(&sentence_v, {}));
+  std::string sentence{sentence_v};
+  std::vector<std::string> words;
+  std::vector<uint32_t> offsets_start, offsets_limit;
+  std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
+  if (sentence == "") {
+    words.push_back("");
+  } else {
+    std::vector<cppjieba::Word> tmp;
+    if (jieba_mode_ == JiebaMode::kMp) {
+      std::unique_ptr<cppjieba::MPSegment> mp_seg = std::make_unique<cppjieba::MPSegment>(jieba_parser_->GetDictTrie());
+      mp_seg->Cut(sentence, tmp, MAX_WORD_LENGTH);
+    } else if (jieba_mode_ == JiebaMode::kHmm) {
+      std::unique_ptr<cppjieba::HMMSegment> hmm_seg =
+        std::make_unique<cppjieba::HMMSegment>(jieba_parser_->GetHMMModel());
+      hmm_seg->Cut(sentence, tmp);
+    } else {  // Mix
+      std::unique_ptr<cppjieba::MixSegment> mix_seg =
+        std::make_unique<cppjieba::MixSegment>(jieba_parser_->GetDictTrie(), jieba_parser_->GetHMMModel());
+      mix_seg->Cut(sentence, tmp, true);
+    }
+    GetStringsFromWords(tmp, words);
+    for (auto item : tmp) {
+      offsets_start.push_back(static_cast<uint32_t>(item.offset));
+      offsets_limit.push_back(static_cast<uint32_t>(item.offset + item.word.length()));
+    }
+  }
+  token_tensor = std::make_shared<Tensor>(words, TensorShape({(dsize_t)words.size()}));
+  output->push_back(token_tensor);
+  if (with_offsets_) {
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_start[0])));
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_limit[0])));
+    output->push_back(offsets_start_tensor);
+    output->push_back(offsets_limit_tensor);
+  }
+  return Status::OK();
+}
+
+Status JiebaTokenizerOp::AddWord(const std::string &word, int freq) {
+  RETURN_UNEXPECTED_IF_NULL(jieba_parser_);
+  if (jieba_parser_->InsertUserWord(word, freq, "") == false) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "add word error");
+  }
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.h
similarity index 77%
rename from mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.h
index 41736e4fdb8..4e49891c003 100644
--- a/mindspore/ccsrc/dataset/text/kernels/jieba_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.h
@@ -20,8 +20,8 @@
 #include <memory>
 
 #include "cppjieba/Jieba.hpp"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -30,15 +30,19 @@ enum class JiebaMode { kMix = 0, kMp = 1, kHmm = 2 };
 
 class JiebaTokenizerOp : public TensorOp {
  public:
-  // deffault constant for Jieba MPSegment algorithm.
+  // default constant for Jieba MPSegment algorithm.
   static constexpr size_t MAX_WORD_LENGTH = 512;
+  // default const for set whether Jieba output offsets tensor.
+  static const bool kDefWithOffsets;
   // Constructor for JiebaTokenizerOp.
   // @param hmm_path HMM model file.
   // @param mp_path MP model file.
   // @mode tokenization mode [Default "MIX"], "MP" model will tokenize with MPSegment algorithm, "HMM" mode will
   // tokenize with Hiddel Markov Model Segment algorithm, "MIx" model will tokenize with a mix of MPSegment and
   // HMMSegment algorithm.
-  JiebaTokenizerOp(const std::string &hmm_path, const std::string &mp_path, JiebaMode mode = JiebaMode::kMix);
+  // @with_offsets user set this value to choose whether output offset tensor.
+  JiebaTokenizerOp(const std::string &hmm_path, const std::string &mp_path, const JiebaMode &mode = JiebaMode::kMix,
+                   const bool &with_offsets = kDefWithOffsets);
   ~JiebaTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override {
@@ -46,18 +50,21 @@ class JiebaTokenizerOp : public TensorOp {
         << mp_dict_path_;
   }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
 
   // @word the word to be added to the JiebaTokenizer.
   // @freq [Default 0] the frequency fo the word to be added.
   // @tag [Default ""] the tag of the word to be added.
   Status AddWord(const std::string &word, int freq = 0);
 
+  std::string Name() const override { return kJiebaTokenizerOp; }
+
  protected:
   std::string hmm_model_path_;
   std::string mp_dict_path_;
   std::unique_ptr<cppjieba::Jieba> jieba_parser_;
   JiebaMode jieba_mode_;
+  bool with_offsets_;
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/lookup_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.cc
similarity index 82%
rename from mindspore/ccsrc/dataset/text/kernels/lookup_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.cc
index 07cf7aef5c0..02b75bc4f9b 100644
--- a/mindspore/ccsrc/dataset/text/kernels/lookup_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/lookup_op.h"
+#include "minddata/dataset/text/kernels/lookup_op.h"
 
 #include <string>
 
@@ -26,11 +26,15 @@ LookupOp::LookupOp(std::shared_ptr<Vocab> vocab, WordIdType default_id)
 Status LookupOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
   IO_CHECK(input, output);
   RETURN_UNEXPECTED_IF_NULL(vocab_);
-  CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "None String Tensor");
+  CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "None String Tensor.");
   std::vector<WordIdType> word_ids;
   word_ids.reserve(input->Size());
   for (auto itr = input->begin<std::string_view>(); itr != input->end<std::string_view>(); itr++) {
-    word_ids.push_back(vocab_->Lookup(std::string(*itr), default_id_));
+    WordIdType word_id = vocab_->Lookup(std::string(*itr));
+    word_ids.emplace_back(word_id == Vocab::kNoTokenExists ? default_id_ : word_id);
+    CHECK_FAIL_RETURN_UNEXPECTED(
+      word_ids.back() != Vocab::kNoTokenExists,
+      "Lookup Error: token" + std::string(*itr) + "doesn't exist in vocab and no unknown token is specified.");
   }
 
   RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), type_,
diff --git a/mindspore/ccsrc/dataset/text/kernels/lookup_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.h
similarity index 88%
rename from mindspore/ccsrc/dataset/text/kernels/lookup_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.h
index dad99c32419..4efc64321bb 100644
--- a/mindspore/ccsrc/dataset/text/kernels/lookup_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.h
@@ -20,11 +20,12 @@
 #include <memory>
 #include <vector>
 #include <utility>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
-#include "dataset/text/vocab.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/text/vocab.h"
 
 namespace mindspore {
 namespace dataset {
@@ -52,6 +53,8 @@ class LookupOp : public TensorOp {
   // @return error code
   Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
 
+  std::string Name() const override { return kLookupOp; }
+
  private:
   std::shared_ptr<Vocab> vocab_;
   WordIdType default_id_;
diff --git a/mindspore/ccsrc/dataset/text/kernels/ngram_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/text/kernels/ngram_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc
index bbe449a89a1..36781b9b4d6 100644
--- a/mindspore/ccsrc/dataset/text/kernels/ngram_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "dataset/text/kernels/ngram_op.h"
+#include "minddata/dataset/text/kernels/ngram_op.h"
 
 #include <algorithm>
 #include <memory>
diff --git a/mindspore/ccsrc/dataset/text/kernels/ngram_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.h
similarity index 93%
rename from mindspore/ccsrc/dataset/text/kernels/ngram_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.h
index 3d2c547f793..6ce3881638e 100644
--- a/mindspore/ccsrc/dataset/text/kernels/ngram_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.h
@@ -21,13 +21,12 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
-namespace py = pybind11;
 
 class NgramOp : public TensorOp {
  public:
@@ -59,6 +58,8 @@ class NgramOp : public TensorOp {
   // @param std::ostream &out
   void Print(std::ostream &out) const override;
 
+  std::string Name() const override { return kNgramOp; }
+
  private:
   std::vector<int32_t> ngrams_;  // list of n grams
   int32_t l_len_;                // left padding length
diff --git a/mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.cc
index b9022865764..0c0aa5fa2da 100644
--- a/mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/normalize_utf8_op.h"
+#include "minddata/dataset/text/kernels/normalize_utf8_op.h"
 #include <memory>
 #include <string>
 #include <string_view>
diff --git a/mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.h
similarity index 86%
rename from mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.h
index 5033f2355fb..f914be1c584 100644
--- a/mindspore/ccsrc/dataset/text/kernels/normalize_utf8_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.h
@@ -16,10 +16,11 @@
 #ifndef DATASET_TEXT_KERNELS_NORMALIZE_UTF8_OP_H_
 #define DATASET_TEXT_KERNELS_NORMALIZE_UTF8_OP_H_
 #include <memory>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -42,6 +43,8 @@ class NormalizeUTF8Op : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kNormalizeUTF8Op; }
+
  private:
   NormalizeForm normalize_form_;
 };
diff --git a/mindspore/ccsrc/dataset/text/kernels/regex_replace_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/text/kernels/regex_replace_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.cc
index 1ce2c5ea61e..c370393e768 100644
--- a/mindspore/ccsrc/dataset/text/kernels/regex_replace_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/regex_replace_op.h"
+#include "minddata/dataset/text/kernels/regex_replace_op.h"
 #include <memory>
 #include <string>
 #include <string_view>
diff --git a/mindspore/ccsrc/dataset/text/kernels/regex_replace_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.h
similarity index 89%
rename from mindspore/ccsrc/dataset/text/kernels/regex_replace_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.h
index 30fae132412..ac3d3f7ff0c 100644
--- a/mindspore/ccsrc/dataset/text/kernels/regex_replace_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.h
@@ -22,9 +22,9 @@
 #include "unicode/errorcode.h"
 #include "unicode/utypes.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -42,6 +42,8 @@ class RegexReplaceOp : public TensorOp {
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
+  std::string Name() const override { return kRegexReplaceOp; }
+
  protected:
   Status RegexReplace(icu::RegexMatcher *const matcher, const std::string_view &text, std::string *out) const;
 
diff --git a/mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_tokenizer_op.cc
similarity index 55%
rename from mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/regex_tokenizer_op.cc
index 34c06f28ea3..7ff1d994bed 100644
--- a/mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_tokenizer_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/regex_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/regex_tokenizer_op.h"
 #include <memory>
 #include <string>
 #include <string_view>
@@ -22,8 +22,11 @@
 
 namespace mindspore {
 namespace dataset {
-Status RegexTokenizerOp::GetUnicodeSubstr(const icu::UnicodeString &input, int start, int len, std::string *out_utf8,
-                                          icu::UnicodeString *out_unicode) const {
+
+const bool RegexTokenizerOp::kDefWithOffsets = false;
+
+Status RegexTokenizerOp::GetUnicodeSubstr(const icu::UnicodeString &input, const int &start, const int &len,
+                                          std::string *out_utf8, icu::UnicodeString *out_unicode) const {
   CHECK_FAIL_RETURN_UNEXPECTED((out_utf8 != nullptr || out_unicode != nullptr), "Wrong input");
   int total_len = input.length();
   int end = start + len;
@@ -39,7 +42,9 @@ Status RegexTokenizerOp::GetUnicodeSubstr(const icu::UnicodeString &input, int s
   return Status::OK();
 }
 
-Status RegexTokenizerOp::GetRegexTokens(const std::string &text, std::vector<std::string> *out_tokens) const {
+Status RegexTokenizerOp::GetRegexTokens(const std::string &text, std::vector<std::string> *out_tokens,
+                                        std::vector<uint32_t> *offsets_start,
+                                        std::vector<uint32_t> *offsets_limit) const {
   UErrorCode status = U_ZERO_ERROR;
   out_tokens->clear();
   icu::RegexMatcher token_matcher(delim_pattern_, 0, status);
@@ -50,6 +55,7 @@ Status RegexTokenizerOp::GetRegexTokens(const std::string &text, std::vector<std
   icu::UnicodeString utext(icu::UnicodeString::fromUTF8(text));
   token_matcher.reset(utext);
 
+  int text_start_index = 0;
   int token_start_index = 0;
   status = U_ZERO_ERROR;
   while (token_matcher.find(status) && U_SUCCESS(status)) {
@@ -62,41 +68,70 @@ Status RegexTokenizerOp::GetRegexTokens(const std::string &text, std::vector<std
     int token_len = deli_start_index - token_start_index;
     if (token_len > 0) {
       std::string token;
+      uint32_t token_offset = 0;
       RETURN_IF_NOT_OK(GetUnicodeSubstr(utext, token_start_index, token_len, &token));
+      token_offset = token.length();
       out_tokens->emplace_back(std::move(token));
+      offsets_start->push_back(static_cast<uint32_t>(text_start_index));
+      offsets_limit->push_back(static_cast<uint32_t>(text_start_index + token_offset));
+      text_start_index += token_offset;
     }
 
     int delim_len = deli_end_index - deli_start_index;
-    if (keep_delim_ && delim_len > 0) {
+    if (delim_len > 0) {
       icu::UnicodeString delim_str;
       std::string delim_utf8_str;
+      uint32_t delim_str_offset = 0;
       RETURN_IF_NOT_OK(GetUnicodeSubstr(utext, deli_start_index, delim_len, &delim_utf8_str, &delim_str));
       delim_matcher.reset(delim_str);
-      if (delim_matcher.matches(status) && U_SUCCESS(status)) {
+      delim_str_offset = delim_utf8_str.length();
+      if (keep_delim_ && delim_matcher.matches(status) && U_SUCCESS(status)) {
         out_tokens->emplace_back(std::move(delim_utf8_str));
+        offsets_start->push_back(static_cast<uint32_t>(text_start_index));
+        offsets_limit->push_back(static_cast<uint32_t>(text_start_index + delim_str_offset));
       }
+      text_start_index += delim_str_offset;
     }
     token_start_index = deli_end_index;
   }
 
   if (token_start_index < utext.length()) {
     std::string temp;
+    uint32_t temp_offset = 0;
     RETURN_IF_NOT_OK(GetUnicodeSubstr(utext, token_start_index, utext.length() - token_start_index, &temp));
+    temp_offset = temp.length();
     out_tokens->emplace_back(std::move(temp));
+    offsets_start->push_back(static_cast<uint32_t>(text_start_index));
+    offsets_limit->push_back(static_cast<uint32_t>(text_start_index + temp_offset));
   }
   return Status::OK();
 }
 
-Status RegexTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
+Status RegexTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
+  if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
     RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
   }
   std::string_view text;
-  RETURN_IF_NOT_OK(input->GetItemAt(&text, {}));
   std::vector<std::string> tokens;
-  RETURN_IF_NOT_OK(GetRegexTokens(std::string(text.data(), text.size()), &tokens));
-  *output = std::make_shared<Tensor>(std::move(tokens), TensorShape({(dsize_t)tokens.size()}));
+  std::vector<uint32_t> offsets_start;
+  std::vector<uint32_t> offsets_limit;
+  std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
+  RETURN_IF_NOT_OK(input[0]->GetItemAt(&text, {}));
+  RETURN_IF_NOT_OK(GetRegexTokens(std::string(text.data(), text.size()), &tokens, &offsets_start, &offsets_limit));
+  token_tensor = std::make_shared<Tensor>(std::move(tokens), TensorShape({(dsize_t)tokens.size()}));
+  output->push_back(token_tensor);
+  if (with_offsets_) {
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_start[0])));
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_limit[0])));
+    output->push_back(offsets_start_tensor);
+    output->push_back(offsets_limit_tensor);
+  }
   return Status::OK();
 }
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_tokenizer_op.h
similarity index 70%
rename from mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/regex_tokenizer_op.h
index bcf02a4a118..56271f95516 100644
--- a/mindspore/ccsrc/dataset/text/kernels/regex_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_tokenizer_op.h
@@ -23,34 +23,42 @@
 #include "unicode/errorcode.h"
 #include "unicode/utypes.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
 
 class RegexTokenizerOp : public TensorOp {
  public:
-  RegexTokenizerOp(const std::string &delim_pattern, const std::string &keep_delim_pattern)
+  static const bool kDefWithOffsets;
+
+  RegexTokenizerOp(const std::string &delim_pattern, const std::string &keep_delim_pattern,
+                   const bool &with_offsets = kDefWithOffsets)
       : delim_pattern_(icu::UnicodeString::fromUTF8(delim_pattern)),
         keep_delim_pattern_(icu::UnicodeString::fromUTF8(keep_delim_pattern)),
+        with_offsets_(with_offsets),
         keep_delim_(!keep_delim_pattern.empty()) {}
 
   ~RegexTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override { out << "RegexTokenizerOp"; }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
 
  protected:
-  Status GetUnicodeSubstr(const icu::UnicodeString &input, int start, int len, std::string *out_utf8,
+  Status GetUnicodeSubstr(const icu::UnicodeString &input, const int &start, const int &len, std::string *out_utf8,
                           icu::UnicodeString *out_unicode = nullptr) const;
-  Status GetRegexTokens(const std::string &text, std::vector<std::string> *out_tokens) const;
+  Status GetRegexTokens(const std::string &text, std::vector<std::string> *out_tokens,
+                        std::vector<uint32_t> *offsets_start, std::vector<uint32_t> *offsets_limit) const;
+
+  std::string Name() const override { return kRegexTokenizerOp; }
 
  private:
   const icu::UnicodeString delim_pattern_;
   const icu::UnicodeString keep_delim_pattern_;
+  bool with_offsets_;
   const bool keep_delim_;
 };
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/text/kernels/to_number_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/to_number_op.cc
similarity index 96%
rename from mindspore/ccsrc/dataset/text/kernels/to_number_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/to_number_op.cc
index 1368684daff..a6685a2d643 100644
--- a/mindspore/ccsrc/dataset/text/kernels/to_number_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/to_number_op.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "dataset/text/kernels/to_number_op.h"
+#include "minddata/dataset/text/kernels/to_number_op.h"
 
 #include <algorithm>
 #include <limits>
@@ -23,11 +23,11 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/kernels/data/data_utils.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/text/kernels/to_number_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/to_number_op.h
similarity index 91%
rename from mindspore/ccsrc/dataset/text/kernels/to_number_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/to_number_op.h
index 1346ce2f474..8582fcf0736 100644
--- a/mindspore/ccsrc/dataset/text/kernels/to_number_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/to_number_op.h
@@ -21,10 +21,10 @@
 #include <string>
 #include <vector>
 
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -57,6 +57,8 @@ class ToNumberOp : public TensorOp {
   // @param std::ostream &out
   void Print(std::ostream &out) const override;
 
+  std::string Name() const override { return kToNumberOp; }
+
  private:
   template <typename T>
   Status ToSignedIntegral(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
diff --git a/mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.cc
similarity index 90%
rename from mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.cc
index 136d5006dfe..53a803c5424 100644
--- a/mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "dataset/text/kernels/truncate_sequence_pair_op.h"
+#include "minddata/dataset/text/kernels/truncate_sequence_pair_op.h"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/kernels/data/slice_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/kernels/data/slice_op.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.h
similarity index 83%
rename from mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.h
index e8be6802a8b..ce827356450 100644
--- a/mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.h
@@ -22,10 +22,10 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/kernels/data/type_cast_op.h"
-#include "dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/kernels/data/type_cast_op.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
 
 namespace mindspore {
 namespace dataset {
@@ -40,6 +40,8 @@ class TruncateSequencePairOp : public TensorOp {
 
   Status Compute(const TensorRow &input, TensorRow *output) override;
 
+  std::string Name() const override { return kTruncateSequencePairOp; }
+
  private:
   dsize_t max_length_;
 };
diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.cc
new file mode 100644
index 00000000000..e08f61100b1
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.cc
@@ -0,0 +1,73 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/text/kernels/unicode_char_tokenizer_op.h"
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "cppjieba/Unicode.hpp"
+
+using cppjieba::DecodeRunesInString;
+using cppjieba::RuneStrArray;
+
+namespace mindspore {
+namespace dataset {
+
+const bool UnicodeCharTokenizerOp::kDefWithOffsets = false;
+
+Status UnicodeCharTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
+  if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
+    RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
+  }
+  std::string_view str;
+  RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {}));
+
+  RuneStrArray runes;
+  if (!DecodeRunesInString(str.data(), str.size(), runes)) {
+    RETURN_STATUS_UNEXPECTED("Decode utf8 string failed.");
+  }
+  std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
+  std::vector<std::string> splits(runes.size());
+  std::vector<uint32_t> offsets_start, offsets_limit;
+  for (size_t i = 0; i < runes.size(); i++) {
+    offsets_start.push_back(runes[i].offset);
+    offsets_limit.push_back(runes[i].offset + runes[i].len);
+    splits[i] = str.substr(runes[i].offset, runes[i].len);
+  }
+  if (splits.empty()) {
+    splits.emplace_back("");
+    offsets_start.push_back(0);
+    offsets_limit.push_back(0);
+  }
+  token_tensor = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()}));
+  output->push_back(token_tensor);
+  if (with_offsets_) {
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_start[0])));
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_limit[0])));
+    output->push_back(offsets_start_tensor);
+    output->push_back(offsets_limit_tensor);
+  }
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.h
similarity index 69%
rename from mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.h
index 01a84eca8ba..415d99b4511 100644
--- a/mindspore/ccsrc/dataset/text/kernels/unicode_char_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.h
@@ -16,23 +16,31 @@
 #ifndef DATASET_TEXT_KERNELS_UNICODE_CHAR_TOKENIZER_OP_H_
 #define DATASET_TEXT_KERNELS_UNICODE_CHAR_TOKENIZER_OP_H_
 #include <memory>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
 
 class UnicodeCharTokenizerOp : public TensorOp {
  public:
-  UnicodeCharTokenizerOp() {}
+  static const bool kDefWithOffsets;
+
+  explicit UnicodeCharTokenizerOp(const bool &with_offsets = kDefWithOffsets) : with_offsets_(with_offsets) {}
 
   ~UnicodeCharTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override { out << "UnicodeCharTokenizerOp"; }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
+
+  std::string Name() const override { return kUnicodeCharTokenizerOp; }
+
+ private:
+  bool with_offsets_;
 };
 
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_script_tokenizer_op.cc
similarity index 61%
rename from mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/unicode_script_tokenizer_op.cc
index 97a4f1333dc..60fe8dd0e41 100644
--- a/mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_script_tokenizer_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/unicode_script_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/unicode_script_tokenizer_op.h"
 #include <memory>
 #include <string>
 #include <string_view>
@@ -32,24 +32,28 @@ namespace mindspore {
 namespace dataset {
 
 const bool UnicodeScriptTokenizerOp::kDefKeepWhitespace = false;
+const bool UnicodeScriptTokenizerOp::kDefWithOffsets = false;
 
-Status UnicodeScriptTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
+Status UnicodeScriptTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
+  if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
     RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
   }
   std::string_view str;
-  RETURN_IF_NOT_OK(input->GetItemAt(&str, {}));
+  RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {}));
   RuneStrArray runes;
   if (!DecodeRunesInString(str.data(), str.size(), runes)) {
     RETURN_STATUS_UNEXPECTED("Decode utf8 string failed.");
   }
 
+  std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
   UScriptCode last_script = USCRIPT_INVALID_CODE;
   icu::ErrorCode status;
   int start = 0;
   int len = 0;
   std::vector<std::string> splits;
+  std::vector<uint32_t> offsets_start, offsets_limit;
 
   bool was_space = false;
   for (size_t i = 0; i < runes.size(); i++) {
@@ -66,6 +70,8 @@ Status UnicodeScriptTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
     if (len > 0 && (script != last_script || is_space != was_space)) {
       // 3) If keep_whitespace_ is false, all the whitespace characters will be discard
       if (keep_whitespace_ || !was_space) {
+        offsets_start.push_back(static_cast<uint32_t>(start));
+        offsets_limit.push_back(static_cast<uint32_t>(start + len));
         std::string temp(str.substr(start, len));
         splits.emplace_back(std::move(temp));
       }
@@ -79,14 +85,29 @@ Status UnicodeScriptTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
   }
 
   if (len > 0 && (keep_whitespace_ || !was_space)) {
+    offsets_start.push_back(static_cast<uint32_t>(start));
+    offsets_limit.push_back(static_cast<uint32_t>(start + len));
     std::string temp(str.substr(start, len));
     splits.emplace_back(std::move(temp));
   }
   // 4) If the input is empty scalar string, the output will be 1-D empty string.
   if (splits.empty()) {
     splits.emplace_back("");
+    offsets_start.push_back(0);
+    offsets_limit.push_back(0);
+  }
+  token_tensor = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()}));
+  output->push_back(token_tensor);
+  if (with_offsets_) {
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_start[0])));
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_limit[0])));
+    output->push_back(offsets_start_tensor);
+    output->push_back(offsets_limit_tensor);
   }
-  *output = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()}));
   return Status::OK();
 }
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_script_tokenizer_op.h
similarity index 67%
rename from mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/unicode_script_tokenizer_op.h
index a77b0b3fa3e..fc3b9e620a2 100644
--- a/mindspore/ccsrc/dataset/text/kernels/unicode_script_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_script_tokenizer_op.h
@@ -16,10 +16,11 @@
 #ifndef DATASET_TEXT_KERNELS_UNICODE_SCRIPT_TOKENIZER_OP_H_
 #define DATASET_TEXT_KERNELS_UNICODE_SCRIPT_TOKENIZER_OP_H_
 #include <memory>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -27,17 +28,23 @@ namespace dataset {
 class UnicodeScriptTokenizerOp : public TensorOp {
  public:
   static const bool kDefKeepWhitespace;
+  static const bool kDefWithOffsets;
 
-  explicit UnicodeScriptTokenizerOp(bool keep_whitespace = kDefKeepWhitespace) : keep_whitespace_(keep_whitespace) {}
+  explicit UnicodeScriptTokenizerOp(const bool &keep_whitespace = kDefKeepWhitespace,
+                                    const bool &with_offsets = kDefWithOffsets)
+      : keep_whitespace_(keep_whitespace), with_offsets_(with_offsets) {}
 
   ~UnicodeScriptTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override { out << "UnicodeScriptTokenizerOp"; }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
+
+  std::string Name() const override { return kUnicodeScriptTokenizerOp; }
 
  private:
   bool keep_whitespace_;  // If or not keep whitespace tokens
+  bool with_offsets_;
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.cc
similarity index 51%
rename from mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.cc
index 35f3f8d0e23..d3bb32081e5 100644
--- a/mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/text/kernels/whitespace_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/whitespace_tokenizer_op.h"
 #include <memory>
 #include <string>
 #include <string_view>
@@ -30,24 +30,33 @@ using cppjieba::RuneStrArray;
 
 namespace mindspore {
 namespace dataset {
-Status WhitespaceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
+
+const bool WhitespaceTokenizerOp::kDefWithOffsets = false;
+
+Status WhitespaceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
+  if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
     RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
   }
   std::string_view str;
-  RETURN_IF_NOT_OK(input->GetItemAt(&str, {}));
+  RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {}));
 
   RuneStrArray runes;
   if (!DecodeRunesInString(str.data(), str.size(), runes)) {
     RETURN_STATUS_UNEXPECTED("Decode utf8 string failed.");
   }
+
+  std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
+  std::vector<uint32_t> offsets_start, offsets_limit;
   std::vector<std::string> splits;
   int start = 0;
   int len = 0;
   for (size_t i = 0; i < runes.size(); i++) {
     if (u_isUWhiteSpace(runes[i].rune)) {
       if (len > 0) {
+        offsets_start.push_back(static_cast<uint32_t>(start));
+        offsets_limit.push_back(static_cast<uint32_t>(start + len));
         std::string temp(str.substr(start, len));
         splits.emplace_back(std::move(temp));
         len = 0;
@@ -60,13 +69,28 @@ Status WhitespaceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std:
     }
   }
   if (len > 0) {
+    offsets_start.push_back(static_cast<uint32_t>(start));
+    offsets_limit.push_back(static_cast<uint32_t>(start + len));
     std::string temp(str.substr(start, len));
     splits.emplace_back(std::move(temp));
   }
   if (splits.empty()) {
     splits.emplace_back("");
+    offsets_start.push_back(0);
+    offsets_limit.push_back(0);
+  }
+  token_tensor = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()}));
+  output->push_back(token_tensor);
+  if (with_offsets_) {
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_start[0])));
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_limit[0])));
+    output->push_back(offsets_start_tensor);
+    output->push_back(offsets_limit_tensor);
   }
-  *output = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()}));
   return Status::OK();
 }
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.h
similarity index 69%
rename from mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.h
index 6d0bab0bea6..7cc37fd705b 100644
--- a/mindspore/ccsrc/dataset/text/kernels/whitespace_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.h
@@ -16,23 +16,31 @@
 #ifndef DATASET_TEXT_KERNELS_WHITESPACE_TOKENIZER_OP_H_
 #define DATASET_TEXT_KERNELS_WHITESPACE_TOKENIZER_OP_H_
 #include <memory>
+#include <string>
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
 
 class WhitespaceTokenizerOp : public TensorOp {
  public:
-  WhitespaceTokenizerOp() {}
+  static const bool kDefWithOffsets;
+
+  explicit WhitespaceTokenizerOp(const bool &with_offsets = kDefWithOffsets) : with_offsets_(with_offsets) {}
 
   ~WhitespaceTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override { out << "WhitespaceTokenizerOp"; }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
+
+  std::string Name() const override { return kWhitespaceTokenizerOp; }
+
+ private:
+  bool with_offsets_;
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.cc
similarity index 50%
rename from mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.cc
rename to mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.cc
index e488c527cd4..f0bd448e398 100644
--- a/mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "dataset/text/kernels/wordpiece_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/wordpiece_tokenizer_op.h"
 #include <algorithm>
 #include <utility>
 
@@ -24,13 +24,16 @@ namespace dataset {
 const char WordpieceTokenizerOp::kDefSuffixIndicator[] = "##";
 const int WordpieceTokenizerOp::kDefMaxBytesPerToken = 100;
 const char WordpieceTokenizerOp::kDefUnknownToken[] = "[UNK]";
+const bool WordpieceTokenizerOp::kDefWithOffsets = false;
 
 WordpieceTokenizerOp::WordpieceTokenizerOp(const std::shared_ptr<Vocab> &vocab, const std::string &suffix_indicator,
-                                           const int &max_bytes_per_token, const std::string &unknown_token)
+                                           const int &max_bytes_per_token, const std::string &unknown_token,
+                                           const bool &with_offsets)
     : vocab_(vocab),
       suffix_indicator_(suffix_indicator),
       max_bytes_per_token_(max_bytes_per_token),
-      unknown_token_(unknown_token) {}
+      unknown_token_(unknown_token),
+      with_offsets_(with_offsets) {}
 
 Status WordpieceTokenizerOp::LookupWord(const std::string &input_token, const RuneStrArray &runes, const int start,
                                         bool *out_found, int *out_end) const {
@@ -43,8 +46,7 @@ Status WordpieceTokenizerOp::LookupWord(const std::string &input_token, const Ru
     if (start > 0) {
       word = suffix_indicator_ + word;
     }
-    WordIdType default_id = -1;
-    if (vocab_->Lookup(word, default_id) != default_id) {
+    if (vocab_->Lookup(word) != Vocab::kNoTokenExists) {
       *out_found = true;
       break;
     }
@@ -52,17 +54,22 @@ Status WordpieceTokenizerOp::LookupWord(const std::string &input_token, const Ru
   return Status::OK();
 }
 
-Status WordpieceTokenizerOp::FoundNoToken(const std::string &input_token, std::vector<std::string> *out_tokens) const {
+Status WordpieceTokenizerOp::FoundNoToken(const std::string &input_token, const uint32_t &basic_start,
+                                          std::vector<std::string> *out_tokens, std::vector<uint32_t> *offsets_start,
+                                          std::vector<uint32_t> *offsets_limit) const {
   out_tokens->clear();
+  offsets_start->push_back(basic_start);
   if (unknown_token_.empty()) {
     out_tokens->emplace_back(input_token);
+    offsets_limit->push_back(basic_start + input_token.length());
   } else {
     out_tokens->emplace_back(unknown_token_);
+    offsets_limit->push_back(basic_start + input_token.length());
   }
   return Status::OK();
 }
 
-Status WordpieceTokenizerOp::AddSubword(const std::string &input_token, const int start, const int end,
+Status WordpieceTokenizerOp::AddSubword(const std::string &input_token, const int &start, const int &end,
                                         std::vector<std::string> *out_tokens) const {
   CHECK_FAIL_RETURN_UNEXPECTED(start >= 0 && end > start && end <= input_token.size(), "Out of range");
   std::string subword = input_token.substr(start, end - start);
@@ -73,9 +80,19 @@ Status WordpieceTokenizerOp::AddSubword(const std::string &input_token, const in
   return Status::OK();
 }
 
-Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, std::vector<std::string> *out_tokens) const {
+Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, const uint32_t &basic_start,
+                                       std::vector<std::string> *out_tokens, std::vector<uint32_t> *offsets_start,
+                                       std::vector<uint32_t> *offsets_limit) const {
   if (input_token.size() > max_bytes_per_token_) {
-    return FoundNoToken(input_token, out_tokens);
+    offsets_start->push_back(basic_start);
+    if (!unknown_token_.empty()) {
+      offsets_limit->push_back(basic_start + unknown_token_.size());
+      out_tokens->emplace_back(unknown_token_);
+    } else {
+      out_tokens->emplace_back(input_token);
+      offsets_limit->push_back(basic_start + input_token.size());
+    }
+    return Status::OK();
   }
   RuneStrArray runes;
   if (!DecodeRunesInString(input_token.data(), input_token.size(), runes)) {
@@ -87,29 +104,52 @@ Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, std::vect
     RETURN_IF_NOT_OK(LookupWord(input_token, runes, start, &found, &end));
     if (found) {
       RETURN_IF_NOT_OK(AddSubword(input_token, start, end, out_tokens));
+      offsets_start->push_back(static_cast<uint32_t>(basic_start + start));
+      offsets_limit->push_back(static_cast<uint32_t>(basic_start + end));
       start = end;
     } else {
-      return FoundNoToken(input_token, out_tokens);
+      return FoundNoToken(input_token, basic_start, out_tokens, offsets_start, offsets_limit);
     }
   }
   return Status::OK();
 }
 
-Status WordpieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  IO_CHECK(input, output);
-  if (input->Rank() > 1 || input->type() != DataType::DE_STRING) {
+Status WordpieceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  if (input[0]->Rank() > 1 || input[0]->type() != DataType::DE_STRING) {
     RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor");
   }
+  dsize_t count = 0;
   std::vector<std::string> out_tokens;
-  for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {
+  std::vector<uint32_t> offsets_start, offsets_limit;
+  std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
+  for (auto iter = input[0]->begin<std::string_view>(); iter != input[0]->end<std::string_view>(); iter++) {
+    uint32_t basic_start = 0;
     std::vector<std::string> temp_tokens;
-    RETURN_IF_NOT_OK(GetTokens(std::string(*iter), &temp_tokens));
+    if (with_offsets_ && input.size() == 3) {
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<uint32_t>(&basic_start, {count, 0}));
+    }
+    RETURN_IF_NOT_OK(GetTokens(std::string(*iter), basic_start, &temp_tokens, &offsets_start, &offsets_limit));
     out_tokens.insert(out_tokens.end(), temp_tokens.begin(), temp_tokens.end());
+    count++;
   }
   if (out_tokens.empty()) {
     out_tokens.emplace_back("");
+    offsets_start.push_back(0);
+    offsets_limit.push_back(0);
+  }
+  token_tensor = std::make_shared<Tensor>(out_tokens, TensorShape({(dsize_t)out_tokens.size()}));
+  output->push_back(token_tensor);
+  if (with_offsets_) {
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_start[0])));
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
+                                          TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
+                                          reinterpret_cast<unsigned char *>(&offsets_limit[0])));
+    output->push_back(offsets_start_tensor);
+    output->push_back(offsets_limit_tensor);
   }
-  *output = std::make_shared<Tensor>(out_tokens, TensorShape({(dsize_t)out_tokens.size()}));
   return Status::OK();
 }
 
diff --git a/mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.h b/mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.h
similarity index 65%
rename from mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.h
rename to mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.h
index c9a75025c69..4f9c76f57e9 100644
--- a/mindspore/ccsrc/dataset/text/kernels/wordpiece_tokenizer_op.h
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.h
@@ -22,10 +22,10 @@
 
 #include "cppjieba/Unicode.hpp"
 
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/tensor_op.h"
-#include "dataset/text/vocab.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/text/vocab.h"
+#include "minddata/dataset/util/status.h"
 
 using cppjieba::DecodeRunesInString;
 using cppjieba::RuneStrArray;
@@ -37,27 +37,33 @@ class WordpieceTokenizerOp : public TensorOp {
   static const char kDefSuffixIndicator[];
   static const int kDefMaxBytesPerToken;
   static const char kDefUnknownToken[];
+  static const bool kDefWithOffsets;
   WordpieceTokenizerOp(const std::shared_ptr<Vocab> &vocab, const std::string &suffix_indicator = kDefSuffixIndicator,
                        const int &max_bytes_per_token = kDefMaxBytesPerToken,
-                       const std::string &unknown_token = kDefUnknownToken);
+                       const std::string &unknown_token = kDefUnknownToken, const bool &with_offsets = kDefWithOffsets);
 
   ~WordpieceTokenizerOp() override = default;
 
   void Print(std::ostream &out) const override { out << "WordpieceTokenizerOp"; }
 
-  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+  Status Compute(const TensorRow &input, TensorRow *output) override;
 
  protected:
-  Status AddSubword(const std::string &input_token, const int start, const int end,
+  Status AddSubword(const std::string &input_token, const int &start, const int &end,
                     std::vector<std::string> *out_token) const;
-  Status FoundNoToken(const std::string &input_token, std::vector<std::string> *out_tokens) const;
+  Status FoundNoToken(const std::string &input_token, const uint32_t &basic_start, std::vector<std::string> *out_tokens,
+                      std::vector<uint32_t> *offsets_start, std::vector<uint32_t> *offsets_limit) const;
   Status LookupWord(const std::string &input_token, const RuneStrArray &runes, const int start, bool *out_found,
                     int *out_end) const;
-  Status GetTokens(const std::string &input_token, std::vector<std::string> *out_tokens) const;
+  Status GetTokens(const std::string &input_token, const uint32_t &basic_start, std::vector<std::string> *out_tokens,
+                   std::vector<uint32_t> *offsets_start, std::vector<uint32_t> *offsets_limit) const;
+
+  std::string Name() const override { return kWordpieceTokenizerOp; }
 
  private:
   const std::shared_ptr<Vocab> vocab_;
   const std::string suffix_indicator_;
+  const bool with_offsets_;
   const int max_bytes_per_token_;
   const std::string unknown_token_;
 };
diff --git a/mindspore/ccsrc/dataset/text/vocab.cc b/mindspore/ccsrc/minddata/dataset/text/vocab.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/text/vocab.cc
rename to mindspore/ccsrc/minddata/dataset/text/vocab.cc
index 100dc9d6558..c1b7e6265c8 100644
--- a/mindspore/ccsrc/dataset/text/vocab.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/vocab.cc
@@ -18,15 +18,15 @@
 #include <unordered_map>
 #include <utility>
 
-#include "dataset/text/vocab.h"
+#include "minddata/dataset/text/vocab.h"
 
 namespace mindspore {
 namespace dataset {
 Vocab::Vocab(std::unordered_map<WordType, WordIdType> word2id) { word2id_ = std::move(word2id); }
 
-WordIdType Vocab::Lookup(const WordType &word, WordIdType default_id) const {
+WordIdType Vocab::Lookup(const WordType &word) const {
   auto itr = word2id_.find(word);
-  return itr == word2id_.end() ? default_id : itr->second;
+  return itr == word2id_.end() ? kNoTokenExists : itr->second;
 }
 
 Status Vocab::BuildFromPyList(const py::list &words, const py::list &special_tokens, bool prepend_special,
@@ -100,5 +100,8 @@ void Vocab::append_word(const std::string &word) {
     word2id_[word] = word2id_.size();
   }
 }
+
+const WordIdType Vocab::kNoTokenExists = -1;
+
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/text/vocab.h b/mindspore/ccsrc/minddata/dataset/text/vocab.h
similarity index 92%
rename from mindspore/ccsrc/dataset/text/vocab.h
rename to mindspore/ccsrc/minddata/dataset/text/vocab.h
index fc21c380a2b..6bf6c488c51 100644
--- a/mindspore/ccsrc/dataset/text/vocab.h
+++ b/mindspore/ccsrc/minddata/dataset/text/vocab.h
@@ -22,7 +22,7 @@
 #include <unordered_map>
 #include <vector>
 
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 
@@ -61,12 +61,7 @@ class Vocab {
   // @param const WordType word - word to look up
   // @param WordIdType default_id - word id to return to user when its not in the vocab
   // @return WordIdType, word_id
-  WordIdType Lookup(const WordType &word, WordIdType default_id) const;
-
-  // reverse lookup, lookup the word based on its id
-  // @param WordIdType id - word id to lookup to
-  // @return WordType the word
-  WordType Lookup(WordIdType id);
+  WordIdType Lookup(const WordType &word) const;
 
   // constructor, shouldn't be called directly, can't be private due to std::make_unique()
   // @param std::unordered_map<WordType, WordIdType> map - sanitized word2id map
@@ -81,6 +76,8 @@ class Vocab {
   // destructor
   ~Vocab() = default;
 
+  static const WordIdType kNoTokenExists;
+
  private:
   std::unordered_map<WordType, WordIdType> word2id_;
 };
diff --git a/mindspore/ccsrc/dataset/util/.gitignore b/mindspore/ccsrc/minddata/dataset/util/.gitignore
similarity index 100%
rename from mindspore/ccsrc/dataset/util/.gitignore
rename to mindspore/ccsrc/minddata/dataset/util/.gitignore
diff --git a/mindspore/ccsrc/dataset/util/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/util/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/dataset/util/CMakeLists.txt
rename to mindspore/ccsrc/minddata/dataset/util/CMakeLists.txt
diff --git a/mindspore/ccsrc/dataset/util/README.md b/mindspore/ccsrc/minddata/dataset/util/README.md
similarity index 100%
rename from mindspore/ccsrc/dataset/util/README.md
rename to mindspore/ccsrc/minddata/dataset/util/README.md
diff --git a/mindspore/ccsrc/dataset/util/allocator.h b/mindspore/ccsrc/minddata/dataset/util/allocator.h
similarity index 95%
rename from mindspore/ccsrc/dataset/util/allocator.h
rename to mindspore/ccsrc/minddata/dataset/util/allocator.h
index 50a9cadbe3f..b5eaed97a69 100644
--- a/mindspore/ccsrc/dataset/util/allocator.h
+++ b/mindspore/ccsrc/minddata/dataset/util/allocator.h
@@ -21,7 +21,7 @@
 #include <memory>
 #include <type_traits>
 #include <utility>
-#include "dataset/util/memory_pool.h"
+#include "minddata/dataset/util/memory_pool.h"
 
 namespace mindspore {
 namespace dataset {
@@ -87,8 +87,9 @@ class Allocator {
   std::shared_ptr<MemoryPool> pool_;
 };
 /// \brief It is a wrapper of unique_ptr with a custom allocator and acts like std::lock_guard such that the memory will
-/// be released when the object goes out of scope \tparam T The type of object to be allocated \tparam C Allocator.
-/// Default to std::allocator
+/// be released when the object goes out of scope
+/// \tparam T The type of object to be allocated
+/// \tparam C Allocator. Default to std::allocator
 template <typename T, typename C = std::allocator<T>>
 class MemGuard {
  public:
@@ -168,7 +169,7 @@ class MemGuard {
 
  private:
   allocator alloc_;
-  std::unique_ptr<T[], std::function<void(T *)>> ptr_;
+  std::unique_ptr<T[]> ptr_;
   size_t n_;
 };
 }  // namespace dataset
diff --git a/mindspore/ccsrc/dataset/util/arena.cc b/mindspore/ccsrc/minddata/dataset/util/arena.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/util/arena.cc
rename to mindspore/ccsrc/minddata/dataset/util/arena.cc
index af4f5226788..87a9c614a83 100644
--- a/mindspore/ccsrc/dataset/util/arena.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/arena.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/arena.h"
+#include "minddata/dataset/util/arena.h"
 #include <unistd.h>
 #include <utility>
-#include "dataset/util/system_pool.h"
+#include "minddata/dataset/util/system_pool.h"
 #include "./securec.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/dataset/util/arena.h b/mindspore/ccsrc/minddata/dataset/util/arena.h
similarity index 97%
rename from mindspore/ccsrc/dataset/util/arena.h
rename to mindspore/ccsrc/minddata/dataset/util/arena.h
index 8c5d1e1093f..8887757af15 100644
--- a/mindspore/ccsrc/dataset/util/arena.h
+++ b/mindspore/ccsrc/minddata/dataset/util/arena.h
@@ -19,8 +19,8 @@
 #include <memory>
 #include <mutex>
 #include <utility>
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/treap.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/treap.h"
 
 #define ARENA_LOG_BLK_SZ (6u)
 #define ARENA_BLK_SZ (static_cast<uint16_t>(1u << ARENA_LOG_BLK_SZ))
diff --git a/mindspore/ccsrc/dataset/util/auto_index.h b/mindspore/ccsrc/minddata/dataset/util/auto_index.h
similarity index 96%
rename from mindspore/ccsrc/dataset/util/auto_index.h
rename to mindspore/ccsrc/minddata/dataset/util/auto_index.h
index 5c43ecfd80b..0fe55159e62 100644
--- a/mindspore/ccsrc/dataset/util/auto_index.h
+++ b/mindspore/ccsrc/minddata/dataset/util/auto_index.h
@@ -21,8 +21,8 @@
 #include <utility>
 #include <vector>
 
-#include "dataset/util/btree.h"
-#include "dataset/util/system_pool.h"
+#include "minddata/dataset/util/btree.h"
+#include "minddata/dataset/util/system_pool.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/bit.h b/mindspore/ccsrc/minddata/dataset/util/bit.h
similarity index 100%
rename from mindspore/ccsrc/dataset/util/bit.h
rename to mindspore/ccsrc/minddata/dataset/util/bit.h
diff --git a/mindspore/ccsrc/dataset/util/btree.h b/mindspore/ccsrc/minddata/dataset/util/btree.h
similarity index 98%
rename from mindspore/ccsrc/dataset/util/btree.h
rename to mindspore/ccsrc/minddata/dataset/util/btree.h
index ccf642e366d..828976a0a13 100644
--- a/mindspore/ccsrc/dataset/util/btree.h
+++ b/mindspore/ccsrc/minddata/dataset/util/btree.h
@@ -23,12 +23,12 @@
 #include <memory>
 #include <deque>
 #include "./securec.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/list.h"
-#include "dataset/util/lock.h"
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/services.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/list.h"
+#include "minddata/dataset/util/lock.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/btree_impl.tpp b/mindspore/ccsrc/minddata/dataset/util/btree_impl.tpp
similarity index 100%
rename from mindspore/ccsrc/dataset/util/btree_impl.tpp
rename to mindspore/ccsrc/minddata/dataset/util/btree_impl.tpp
diff --git a/mindspore/ccsrc/dataset/util/btree_iterator.tpp b/mindspore/ccsrc/minddata/dataset/util/btree_iterator.tpp
similarity index 100%
rename from mindspore/ccsrc/dataset/util/btree_iterator.tpp
rename to mindspore/ccsrc/minddata/dataset/util/btree_iterator.tpp
diff --git a/mindspore/ccsrc/dataset/util/buddy.cc b/mindspore/ccsrc/minddata/dataset/util/buddy.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/util/buddy.cc
rename to mindspore/ccsrc/minddata/dataset/util/buddy.cc
index 540fa993d68..d4f5434f811 100644
--- a/mindspore/ccsrc/dataset/util/buddy.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/buddy.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/buddy.h"
+#include "minddata/dataset/util/buddy.h"
 #include <iomanip>
 #include <stdexcept>
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/system_pool.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/system_pool.h"
 #include "utils/log_adapter.h"
 #include "./securec.h"
 
diff --git a/mindspore/ccsrc/dataset/util/buddy.h b/mindspore/ccsrc/minddata/dataset/util/buddy.h
similarity index 98%
rename from mindspore/ccsrc/dataset/util/buddy.h
rename to mindspore/ccsrc/minddata/dataset/util/buddy.h
index 08c05cbbdbe..b1bcd3ce416 100644
--- a/mindspore/ccsrc/dataset/util/buddy.h
+++ b/mindspore/ccsrc/minddata/dataset/util/buddy.h
@@ -22,7 +22,7 @@
 #include <iostream>
 #include <memory>
 #include <mutex>
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 
 using addr_t = int64_t;
 using rel_addr_t = int32_t;
diff --git a/mindspore/ccsrc/dataset/util/cache_pool.cc b/mindspore/ccsrc/minddata/dataset/util/cache_pool.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/util/cache_pool.cc
rename to mindspore/ccsrc/minddata/dataset/util/cache_pool.cc
index 92504cd0634..22fb72eb8a0 100644
--- a/mindspore/ccsrc/dataset/util/cache_pool.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/cache_pool.cc
@@ -15,8 +15,8 @@
  */
 #include <algorithm>
 #include "common/utils.h"
-#include "dataset/util/cache_pool.h"
-#include "dataset/util/services.h"
+#include "minddata/dataset/util/cache_pool.h"
+#include "minddata/dataset/util/services.h"
 
 namespace mindspore {
 namespace dataset {
@@ -98,11 +98,6 @@ Status CachePool::Insert(const std::vector<ReadableSlice> &buf, CachePool::key_t
   } catch (std::bad_alloc &e) {
     if (sm_ != nullptr) {
       RETURN_IF_NOT_OK(sm_->Write(&bl.storage_key, buf));
-      // We have an assumption 0 is not a valid key from the design of AutoIndexObj.
-      // Make sure it is not 0.
-      if (bl.storage_key == 0) {
-        RETURN_STATUS_UNEXPECTED("Key 0 is returned which is unexpected");
-      }
     } else {
       return Status(StatusCode::kOutOfMemory, __LINE__, __FILE__);
     }
diff --git a/mindspore/ccsrc/dataset/util/cache_pool.h b/mindspore/ccsrc/minddata/dataset/util/cache_pool.h
similarity index 95%
rename from mindspore/ccsrc/dataset/util/cache_pool.h
rename to mindspore/ccsrc/minddata/dataset/util/cache_pool.h
index d35617d0e4b..cdb6da16b63 100644
--- a/mindspore/ccsrc/dataset/util/cache_pool.h
+++ b/mindspore/ccsrc/minddata/dataset/util/cache_pool.h
@@ -20,11 +20,11 @@
 #include <mutex>
 #include <string>
 #include <vector>
-#include "dataset/util/allocator.h"
-#include "dataset/util/service.h"
-#include "dataset/util/slice.h"
-#include "dataset/util/storage_manager.h"
-#include "dataset/util/auto_index.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/service.h"
+#include "minddata/dataset/util/slice.h"
+#include "minddata/dataset/util/storage_manager.h"
+#include "minddata/dataset/util/auto_index.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/circular_pool.cc b/mindspore/ccsrc/minddata/dataset/util/circular_pool.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/util/circular_pool.cc
rename to mindspore/ccsrc/minddata/dataset/util/circular_pool.cc
index 0c68dab81bf..f99e6de2f1f 100644
--- a/mindspore/ccsrc/dataset/util/circular_pool.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/circular_pool.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/circular_pool.h"
+#include "minddata/dataset/util/circular_pool.h"
 
 #include <algorithm>
 #include <limits>
 #include <utility>
 #include "./securec.h"
-#include "dataset/util/system_pool.h"
+#include "minddata/dataset/util/system_pool.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
@@ -88,6 +88,9 @@ Status CircularPool::Allocate(size_t n, void **p) {
     while (cirIt.has_next()) {
       auto it = cirIt.Next();
       Arena *ba = it->get();
+      if (ba->get_max_size() < n) {
+        return Status(StatusCode::kOutOfMemory);
+      }
       // If we are asked to move forward the tail
       if (move_tail) {
         Arena *expected = cirIt.cur_tail_;
diff --git a/mindspore/ccsrc/dataset/util/circular_pool.h b/mindspore/ccsrc/minddata/dataset/util/circular_pool.h
similarity index 95%
rename from mindspore/ccsrc/dataset/util/circular_pool.h
rename to mindspore/ccsrc/minddata/dataset/util/circular_pool.h
index 3c526597991..a63afbd6914 100644
--- a/mindspore/ccsrc/dataset/util/circular_pool.h
+++ b/mindspore/ccsrc/minddata/dataset/util/circular_pool.h
@@ -19,9 +19,9 @@
 #include <atomic>
 #include <memory>
 #include <vector>
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/arena.h"
-#include "dataset/util/lock.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/arena.h"
+#include "minddata/dataset/util/lock.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/cond_var.cc b/mindspore/ccsrc/minddata/dataset/util/cond_var.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/util/cond_var.cc
rename to mindspore/ccsrc/minddata/dataset/util/cond_var.cc
index 8b1099fb717..b7c7b76cae4 100644
--- a/mindspore/ccsrc/dataset/util/cond_var.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/cond_var.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/cond_var.h"
+#include "minddata/dataset/util/cond_var.h"
 #include <exception>
 #include <utility>
-#include "dataset/util/services.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/cond_var.h b/mindspore/ccsrc/minddata/dataset/util/cond_var.h
similarity index 90%
rename from mindspore/ccsrc/dataset/util/cond_var.h
rename to mindspore/ccsrc/minddata/dataset/util/cond_var.h
index b23dcd566ef..88fcad24a29 100644
--- a/mindspore/ccsrc/dataset/util/cond_var.h
+++ b/mindspore/ccsrc/minddata/dataset/util/cond_var.h
@@ -21,9 +21,9 @@
 #include <memory>
 #include <mutex>
 #include <string>
-#include "dataset/util/intrp_resource.h"
-#include "dataset/util/intrp_service.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/intrp_resource.h"
+#include "minddata/dataset/util/intrp_service.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/intrp_resource.h b/mindspore/ccsrc/minddata/dataset/util/intrp_resource.h
similarity index 97%
rename from mindspore/ccsrc/dataset/util/intrp_resource.h
rename to mindspore/ccsrc/minddata/dataset/util/intrp_resource.h
index 52024cb90a1..9d78e2cd32a 100644
--- a/mindspore/ccsrc/dataset/util/intrp_resource.h
+++ b/mindspore/ccsrc/minddata/dataset/util/intrp_resource.h
@@ -17,7 +17,7 @@
 #define DATASET_UTIL_INTRP_RESOURCE_H_
 
 #include <atomic>
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/intrp_service.cc b/mindspore/ccsrc/minddata/dataset/util/intrp_service.cc
similarity index 95%
rename from mindspore/ccsrc/dataset/util/intrp_service.cc
rename to mindspore/ccsrc/minddata/dataset/util/intrp_service.cc
index da8dde992c7..a82c82cdc92 100644
--- a/mindspore/ccsrc/dataset/util/intrp_service.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/intrp_service.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/intrp_service.h"
+#include "minddata/dataset/util/intrp_service.h"
 #include <sstream>
 #include "common/utils.h"
-#include "dataset/util/services.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/intrp_service.h b/mindspore/ccsrc/minddata/dataset/util/intrp_service.h
similarity index 87%
rename from mindspore/ccsrc/dataset/util/intrp_service.h
rename to mindspore/ccsrc/minddata/dataset/util/intrp_service.h
index de1d5eb753a..cb6bf30c731 100644
--- a/mindspore/ccsrc/dataset/util/intrp_service.h
+++ b/mindspore/ccsrc/minddata/dataset/util/intrp_service.h
@@ -21,11 +21,11 @@
 #include <mutex>
 #include <string>
 #include <utility>
-#include "dataset/util/allocator.h"
-#include "dataset/util/intrp_resource.h"
-#include "dataset/util/service.h"
-#include "dataset/util/services.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/intrp_resource.h"
+#include "minddata/dataset/util/service.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/status.h"
 
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/dataset/util/list.h b/mindspore/ccsrc/minddata/dataset/util/list.h
similarity index 100%
rename from mindspore/ccsrc/dataset/util/list.h
rename to mindspore/ccsrc/minddata/dataset/util/list.h
diff --git a/mindspore/ccsrc/dataset/util/lock.cc b/mindspore/ccsrc/minddata/dataset/util/lock.cc
similarity index 99%
rename from mindspore/ccsrc/dataset/util/lock.cc
rename to mindspore/ccsrc/minddata/dataset/util/lock.cc
index bde9d84005b..5302196a46c 100644
--- a/mindspore/ccsrc/dataset/util/lock.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/lock.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/lock.h"
+#include "minddata/dataset/util/lock.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/lock.h b/mindspore/ccsrc/minddata/dataset/util/lock.h
similarity index 100%
rename from mindspore/ccsrc/dataset/util/lock.h
rename to mindspore/ccsrc/minddata/dataset/util/lock.h
diff --git a/mindspore/ccsrc/dataset/util/memory_pool.cc b/mindspore/ccsrc/minddata/dataset/util/memory_pool.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/util/memory_pool.cc
rename to mindspore/ccsrc/minddata/dataset/util/memory_pool.cc
index 5d66b4bd6de..0e1be9d798f 100644
--- a/mindspore/ccsrc/dataset/util/memory_pool.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/memory_pool.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/memory_pool.h"
+#include "minddata/dataset/util/memory_pool.h"
 #include "./securec.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/memory_pool.h b/mindspore/ccsrc/minddata/dataset/util/memory_pool.h
similarity index 97%
rename from mindspore/ccsrc/dataset/util/memory_pool.h
rename to mindspore/ccsrc/minddata/dataset/util/memory_pool.h
index ee1da3bda15..c7cc4731092 100644
--- a/mindspore/ccsrc/dataset/util/memory_pool.h
+++ b/mindspore/ccsrc/minddata/dataset/util/memory_pool.h
@@ -19,7 +19,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <memory>
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/path.cc b/mindspore/ccsrc/minddata/dataset/util/path.cc
similarity index 99%
rename from mindspore/ccsrc/dataset/util/path.cc
rename to mindspore/ccsrc/minddata/dataset/util/path.cc
index cdd23437994..8740ecb8e0f 100644
--- a/mindspore/ccsrc/dataset/util/path.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/path.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/path.h"
+#include "minddata/dataset/util/path.h"
 
 #include <sys/stat.h>
 #include <fcntl.h>
diff --git a/mindspore/ccsrc/dataset/util/path.h b/mindspore/ccsrc/minddata/dataset/util/path.h
similarity index 98%
rename from mindspore/ccsrc/dataset/util/path.h
rename to mindspore/ccsrc/minddata/dataset/util/path.h
index fbf65b8c236..8bc07ca8f3e 100644
--- a/mindspore/ccsrc/dataset/util/path.h
+++ b/mindspore/ccsrc/minddata/dataset/util/path.h
@@ -20,7 +20,7 @@
 #include <memory>
 #include <string>
 
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/queue.h b/mindspore/ccsrc/minddata/dataset/util/queue.h
similarity index 96%
rename from mindspore/ccsrc/dataset/util/queue.h
rename to mindspore/ccsrc/minddata/dataset/util/queue.h
index 7fca93d944a..7a0a987499a 100644
--- a/mindspore/ccsrc/dataset/util/queue.h
+++ b/mindspore/ccsrc/minddata/dataset/util/queue.h
@@ -26,10 +26,10 @@
 
 #include "common/utils.h"
 #include "utils/log_adapter.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/services.h"
-#include "dataset/util/cond_var.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/cond_var.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
@@ -182,6 +182,9 @@ class Queue {
         arr_[k].~T();
       }
     }
+    for (uint64_t i = 0; i < sz_; i++) {
+      std::allocator_traits<Allocator<T>>::construct(alloc_, &(arr_[i]));
+    }
     empty_cv_.ResetIntrpState();
     full_cv_.ResetIntrpState();
     head_ = 0;
diff --git a/mindspore/ccsrc/dataset/util/random.h b/mindspore/ccsrc/minddata/dataset/util/random.h
similarity index 95%
rename from mindspore/ccsrc/dataset/util/random.h
rename to mindspore/ccsrc/minddata/dataset/util/random.h
index 957a4214a82..d2658f67ecb 100644
--- a/mindspore/ccsrc/dataset/util/random.h
+++ b/mindspore/ccsrc/minddata/dataset/util/random.h
@@ -26,8 +26,8 @@
 #include <string>
 #include <thread>
 
-#include "dataset/core/config_manager.h"
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/semaphore.cc b/mindspore/ccsrc/minddata/dataset/util/semaphore.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/util/semaphore.cc
rename to mindspore/ccsrc/minddata/dataset/util/semaphore.cc
index 36ddf5511d9..5dadd98f3ca 100644
--- a/mindspore/ccsrc/dataset/util/semaphore.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/semaphore.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/semaphore.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/semaphore.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/semaphore.h b/mindspore/ccsrc/minddata/dataset/util/semaphore.h
similarity index 97%
rename from mindspore/ccsrc/dataset/util/semaphore.h
rename to mindspore/ccsrc/minddata/dataset/util/semaphore.h
index 07b9e83e7fb..d07398acb11 100644
--- a/mindspore/ccsrc/dataset/util/semaphore.h
+++ b/mindspore/ccsrc/minddata/dataset/util/semaphore.h
@@ -16,7 +16,7 @@
 #ifndef DATASET_UTIL_SEMAPHORE_H_
 #define DATASET_UTIL_SEMAPHORE_H_
 
-#include "dataset/util/cond_var.h"
+#include "minddata/dataset/util/cond_var.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/service.cc b/mindspore/ccsrc/minddata/dataset/util/service.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/util/service.cc
rename to mindspore/ccsrc/minddata/dataset/util/service.cc
index c89f7287f61..19d60ab47af 100644
--- a/mindspore/ccsrc/dataset/util/service.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/service.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/service.h"
+#include "minddata/dataset/util/service.h"
 #include <thread>
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/service.h b/mindspore/ccsrc/minddata/dataset/util/service.h
similarity index 94%
rename from mindspore/ccsrc/dataset/util/service.h
rename to mindspore/ccsrc/minddata/dataset/util/service.h
index 1113fc1d14c..2b9c7197fea 100644
--- a/mindspore/ccsrc/dataset/util/service.h
+++ b/mindspore/ccsrc/minddata/dataset/util/service.h
@@ -17,8 +17,8 @@
 #define DATASET_UTIL_SERVICE_H_
 
 #include <atomic>
-#include "dataset/util/lock.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/lock.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/services.cc b/mindspore/ccsrc/minddata/dataset/util/services.cc
similarity index 68%
rename from mindspore/ccsrc/dataset/util/services.cc
rename to mindspore/ccsrc/minddata/dataset/util/services.cc
index 6516deea41c..547773e0f12 100644
--- a/mindspore/ccsrc/dataset/util/services.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/services.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/services.h"
+#include "minddata/dataset/util/services.h"
 
 #include <limits.h>
 #if !defined(_WIN32) && !defined(_WIN64)
@@ -22,11 +22,11 @@
 #include <stdlib.h>
 #endif
 #include <unistd.h>
-#include "dataset/util/circular_pool.h"
-#include "dataset/util/random.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/engine/cache/cache_server.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/util/random.h"
+#include "minddata/dataset/util/task_manager.h"
 
-#define SLOT_TASK_MGR 0
 namespace mindspore {
 namespace dataset {
 std::unique_ptr<Services> Services::instance_ = nullptr;
@@ -61,15 +61,25 @@ std::string Services::GetUniqueID() {
 
 TaskManager &Services::getTaskMgrInstance() {
   Services &sm = GetInstance();
-  return *(static_cast<TaskManager *>(sm.sa_[SLOT_TASK_MGR]));
+  return *(static_cast<TaskManager *>(sm.sa_[kSlotTaskMgr_]));
+}
+
+CacheServer &Services::getCacheServer() {
+  Services &sm = GetInstance();
+  return *(static_cast<CacheServer *>(sm.sa_[kSlotCacheMgr_]));
 }
 
 Status Services::CreateAllInstances() {
   // In order, TaskMgr, BufferMgr
   Status rc;
-  sa_[SLOT_TASK_MGR] = new (&rc, pool_) TaskManager();
+  sa_[kSlotTaskMgr_] = new (&rc, pool_) TaskManager();
   RETURN_IF_NOT_OK(rc);
-  rc = sa_[SLOT_TASK_MGR]->ServiceStart();
+  rc = sa_[kSlotTaskMgr_]->ServiceStart();
+  RETURN_IF_NOT_OK(rc);
+  // TODO(jesse) : Get the parameters from config file. Right now spill to /tmp and spawn 3 workers
+  sa_[kSlotCacheMgr_] = new (&rc, pool_) CacheServer("/tmp", 3);
+  RETURN_IF_NOT_OK(rc);
+  rc = sa_[kSlotCacheMgr_]->ServiceStart();
   return rc;
 }
 
@@ -83,8 +93,14 @@ Services::Services() : pool_(nullptr), sa_{nullptr} {
 Services::~Services() noexcept {
   try {
     // In reverse order
-    TaskManager *tm = static_cast<TaskManager *>(sa_[SLOT_TASK_MGR]);
-    if (tm) {
+    CacheServer *cs = static_cast<CacheServer *>(sa_[kSlotCacheMgr_]);
+    if (cs != nullptr) {
+      (void)cs->ServiceStop();
+      cs->~CacheServer();
+      pool_->Deallocate(cs);
+    }
+    TaskManager *tm = static_cast<TaskManager *>(sa_[kSlotTaskMgr_]);
+    if (tm != nullptr) {
       (void)tm->ServiceStop();
       tm->~TaskManager();
       pool_->Deallocate(tm);
diff --git a/mindspore/ccsrc/dataset/util/services.h b/mindspore/ccsrc/minddata/dataset/util/services.h
similarity index 88%
rename from mindspore/ccsrc/dataset/util/services.h
rename to mindspore/ccsrc/minddata/dataset/util/services.h
index e19f44dccc4..c7adea0b6e0 100644
--- a/mindspore/ccsrc/dataset/util/services.h
+++ b/mindspore/ccsrc/minddata/dataset/util/services.h
@@ -19,15 +19,15 @@
 #include <memory>
 #include <mutex>
 #include <string>
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/service.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/service.h"
 
 #define UNIQUEID_LEN 36
 namespace mindspore {
 namespace dataset {
 class TaskManager;
-
+class CacheServer;
 class Services {
  public:
   static Status CreateInstance() {
@@ -61,6 +61,8 @@ class Services {
 
   static TaskManager &getTaskMgrInstance();
 
+  static CacheServer &getCacheServer();
+
   std::shared_ptr<MemoryPool> GetServiceMemPool() { return pool_; }
 
 #if !defined(_WIN32) && !defined(_WIN64)
@@ -87,7 +89,9 @@ class Services {
   // We use pointers here instead of unique_ptr because we
   // want to have ultimate control on the order of
   // construction and destruction.
-  static constexpr int kNumServices_ = 1;
+  static constexpr int kSlotTaskMgr_ = 0;
+  static constexpr int kSlotCacheMgr_ = 1;
+  static constexpr int kNumServices_ = 2;
   Service *sa_[kNumServices_];
 
   Services();
diff --git a/mindspore/ccsrc/dataset/util/sig_handler.cc b/mindspore/ccsrc/minddata/dataset/util/sig_handler.cc
similarity index 94%
rename from mindspore/ccsrc/dataset/util/sig_handler.cc
rename to mindspore/ccsrc/minddata/dataset/util/sig_handler.cc
index 644a6330665..eed3b4ee4de 100644
--- a/mindspore/ccsrc/dataset/util/sig_handler.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/sig_handler.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/sig_handler.h"
+#include "minddata/dataset/util/sig_handler.h"
 #include <signal.h>
 #include <sys/types.h>
 #if !defined(_WIN32) && !defined(_WIN64)
 #include <ucontext.h>
 #endif
 #include <unistd.h>
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/sig_handler.h b/mindspore/ccsrc/minddata/dataset/util/sig_handler.h
similarity index 100%
rename from mindspore/ccsrc/dataset/util/sig_handler.h
rename to mindspore/ccsrc/minddata/dataset/util/sig_handler.h
diff --git a/mindspore/ccsrc/dataset/util/slice.cc b/mindspore/ccsrc/minddata/dataset/util/slice.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/util/slice.cc
rename to mindspore/ccsrc/minddata/dataset/util/slice.cc
index f1798b4f44a..beff2b3dd26 100644
--- a/mindspore/ccsrc/dataset/util/slice.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/slice.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
 */
-#include "dataset/util/slice.h"
+#include "minddata/dataset/util/slice.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/slice.h b/mindspore/ccsrc/minddata/dataset/util/slice.h
similarity index 95%
rename from mindspore/ccsrc/dataset/util/slice.h
rename to mindspore/ccsrc/minddata/dataset/util/slice.h
index 127df23cfab..1caee0f816b 100644
--- a/mindspore/ccsrc/dataset/util/slice.h
+++ b/mindspore/ccsrc/minddata/dataset/util/slice.h
@@ -20,8 +20,8 @@
 #include <cstddef>
 #include <utility>
 #include "./securec.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/status.h"
 namespace mindspore {
 namespace dataset {
 /// \brief A ReadableSlice wraps a const pointer in memory and its size.
@@ -31,6 +31,10 @@ class ReadableSlice {
  public:
   ReadableSlice() : ptr_(nullptr), sz_(0) {}
   ReadableSlice(const void *ptr, size_t sz) : ptr_(ptr), sz_(sz) {}
+
+  /// \brief Destructor
+  ~ReadableSlice() = default;
+
   ReadableSlice(const ReadableSlice &src, off64_t offset, size_t len) {
     ptr_ = static_cast<const char *>(src.GetPointer()) + offset;
     sz_ = len;
@@ -89,6 +93,8 @@ class WritableSlice : public ReadableSlice {
   WritableSlice(const WritableSlice &src, off64_t offset, size_t len);
   WritableSlice(const WritableSlice &src, off64_t offset);
   WritableSlice(const WritableSlice &lhs) : ReadableSlice(lhs) { mutable_data_ = lhs.mutable_data_; }
+  /// \brief Destructor
+  ~WritableSlice() = default;
   WritableSlice &operator=(const WritableSlice &lhs) {
     if (this != &lhs) {
       mutable_data_ = lhs.mutable_data_;
diff --git a/mindspore/ccsrc/dataset/util/status.cc b/mindspore/ccsrc/minddata/dataset/util/status.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/util/status.cc
rename to mindspore/ccsrc/minddata/dataset/util/status.cc
index 27e9dfbc83b..3fc498b701d 100644
--- a/mindspore/ccsrc/dataset/util/status.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/status.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 #include <sstream>
 #include "common/utils.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/status.h b/mindspore/ccsrc/minddata/dataset/util/status.h
similarity index 100%
rename from mindspore/ccsrc/dataset/util/status.h
rename to mindspore/ccsrc/minddata/dataset/util/status.h
diff --git a/mindspore/ccsrc/dataset/util/storage_container.cc b/mindspore/ccsrc/minddata/dataset/util/storage_container.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/util/storage_container.cc
rename to mindspore/ccsrc/minddata/dataset/util/storage_container.cc
index 3a4c13e2d9e..506495227d9 100644
--- a/mindspore/ccsrc/dataset/util/storage_container.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/storage_container.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/storage_container.h"
+#include "minddata/dataset/util/storage_container.h"
 
 #include <fcntl.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <vector>
 #include "common/utils.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/storage_container.h b/mindspore/ccsrc/minddata/dataset/util/storage_container.h
similarity index 90%
rename from mindspore/ccsrc/dataset/util/storage_container.h
rename to mindspore/ccsrc/minddata/dataset/util/storage_container.h
index 07e41bd66a7..a304012b609 100644
--- a/mindspore/ccsrc/dataset/util/storage_container.h
+++ b/mindspore/ccsrc/minddata/dataset/util/storage_container.h
@@ -22,11 +22,11 @@
 #include <mutex>
 #include <string>
 #include <vector>
-#include "dataset/util/system_pool.h"
-#include "dataset/util/buddy.h"
-#include "dataset/util/path.h"
-#include "dataset/util/slice.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/system_pool.h"
+#include "minddata/dataset/util/buddy.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/slice.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/storage_manager.cc b/mindspore/ccsrc/minddata/dataset/util/storage_manager.cc
similarity index 97%
rename from mindspore/ccsrc/dataset/util/storage_manager.cc
rename to mindspore/ccsrc/minddata/dataset/util/storage_manager.cc
index 1d958576ba5..2f85d00a453 100644
--- a/mindspore/ccsrc/dataset/util/storage_manager.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/storage_manager.cc
@@ -13,15 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/storage_manager.h"
+#include "minddata/dataset/util/storage_manager.h"
 
 #include <iomanip>
 #include <sstream>
 #include <stdexcept>
 #include <utility>
 #include "common/utils.h"
-#include "dataset/util/path.h"
-#include "dataset/util/services.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/services.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/storage_manager.h b/mindspore/ccsrc/minddata/dataset/util/storage_manager.h
similarity index 85%
rename from mindspore/ccsrc/dataset/util/storage_manager.h
rename to mindspore/ccsrc/minddata/dataset/util/storage_manager.h
index 075ac713d2c..e79e7c6e63c 100644
--- a/mindspore/ccsrc/dataset/util/storage_manager.h
+++ b/mindspore/ccsrc/minddata/dataset/util/storage_manager.h
@@ -21,14 +21,14 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "dataset/util/allocator.h"
-#include "dataset/util/auto_index.h"
-#include "dataset/util/lock.h"
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/path.h"
-#include "dataset/util/service.h"
-#include "dataset/util/slice.h"
-#include "dataset/util/storage_container.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/auto_index.h"
+#include "minddata/dataset/util/lock.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/service.h"
+#include "minddata/dataset/util/slice.h"
+#include "minddata/dataset/util/storage_container.h"
 
 using ListOfContainers = std::vector<std::shared_ptr<mindspore::dataset::StorageContainer>>;
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/system_pool.h b/mindspore/ccsrc/minddata/dataset/util/system_pool.h
similarity index 96%
rename from mindspore/ccsrc/dataset/util/system_pool.h
rename to mindspore/ccsrc/minddata/dataset/util/system_pool.h
index 286e30a6158..3a7e61d16b7 100644
--- a/mindspore/ccsrc/dataset/util/system_pool.h
+++ b/mindspore/ccsrc/minddata/dataset/util/system_pool.h
@@ -22,8 +22,8 @@
 #include <memory>
 #include <new>
 #include "./securec.h"
-#include "dataset/util/allocator.h"
-#include "dataset/util/memory_pool.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/memory_pool.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/task.cc b/mindspore/ccsrc/minddata/dataset/util/task.cc
similarity index 98%
rename from mindspore/ccsrc/dataset/util/task.cc
rename to mindspore/ccsrc/minddata/dataset/util/task.cc
index 93db55d5f91..39d754e8060 100644
--- a/mindspore/ccsrc/dataset/util/task.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/task.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/task.h"
+#include "minddata/dataset/util/task.h"
 #include "common/utils.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/task_manager.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/task.h b/mindspore/ccsrc/minddata/dataset/util/task.h
similarity index 93%
rename from mindspore/ccsrc/dataset/util/task.h
rename to mindspore/ccsrc/minddata/dataset/util/task.h
index 49eb16b1827..9309a3de7b5 100644
--- a/mindspore/ccsrc/dataset/util/task.h
+++ b/mindspore/ccsrc/minddata/dataset/util/task.h
@@ -27,11 +27,11 @@
 #include <stdexcept>
 #include <string>
 #include <thread>
-#include "dataset/util/intrp_resource.h"
-#include "dataset/util/list.h"
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/services.h"
-#include "dataset/util/wait_post.h"
+#include "minddata/dataset/util/intrp_resource.h"
+#include "minddata/dataset/util/list.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/wait_post.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/util/task_manager.cc b/mindspore/ccsrc/minddata/dataset/util/task_manager.cc
similarity index 99%
rename from mindspore/ccsrc/dataset/util/task_manager.cc
rename to mindspore/ccsrc/minddata/dataset/util/task_manager.cc
index 3965e355642..fefea0b97ca 100644
--- a/mindspore/ccsrc/dataset/util/task_manager.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/task_manager.cc
@@ -17,7 +17,7 @@
 #include <functional>
 #include <set>
 #include "./securec.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/task_manager.h b/mindspore/ccsrc/minddata/dataset/util/task_manager.h
similarity index 94%
rename from mindspore/ccsrc/dataset/util/task_manager.h
rename to mindspore/ccsrc/minddata/dataset/util/task_manager.h
index 5961c9000e8..3030390bab2 100644
--- a/mindspore/ccsrc/dataset/util/task_manager.h
+++ b/mindspore/ccsrc/minddata/dataset/util/task_manager.h
@@ -25,12 +25,12 @@
 #include <memory>
 #include <string>
 #include <set>
-#include "dataset/util/allocator.h"
-#include "dataset/util/intrp_service.h"
-#include "dataset/util/lock.h"
-#include "dataset/util/services.h"
-#include "dataset/util/status.h"
-#include "dataset/util/task.h"
+#include "minddata/dataset/util/allocator.h"
+#include "minddata/dataset/util/intrp_service.h"
+#include "minddata/dataset/util/lock.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/task.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/treap.h b/mindspore/ccsrc/minddata/dataset/util/treap.h
similarity index 100%
rename from mindspore/ccsrc/dataset/util/treap.h
rename to mindspore/ccsrc/minddata/dataset/util/treap.h
diff --git a/mindspore/ccsrc/dataset/util/wait_post.cc b/mindspore/ccsrc/minddata/dataset/util/wait_post.cc
similarity index 93%
rename from mindspore/ccsrc/dataset/util/wait_post.cc
rename to mindspore/ccsrc/minddata/dataset/util/wait_post.cc
index 204f203d9a7..944d9ca245b 100644
--- a/mindspore/ccsrc/dataset/util/wait_post.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/wait_post.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/wait_post.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/wait_post.h"
+#include "minddata/dataset/util/task_manager.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/dataset/util/wait_post.h b/mindspore/ccsrc/minddata/dataset/util/wait_post.h
similarity index 92%
rename from mindspore/ccsrc/dataset/util/wait_post.h
rename to mindspore/ccsrc/minddata/dataset/util/wait_post.h
index 4e60995bd9d..afd3bea38b7 100644
--- a/mindspore/ccsrc/dataset/util/wait_post.h
+++ b/mindspore/ccsrc/minddata/dataset/util/wait_post.h
@@ -17,8 +17,8 @@
 #define DATASET_UTIL_WAIT_POST_H_
 
 #include <mutex>
-#include "dataset/util/cond_var.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/cond_var.h"
+#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
diff --git a/mindspore/ccsrc/mindrecord/CMakeLists.txt b/mindspore/ccsrc/minddata/mindrecord/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/mindrecord/CMakeLists.txt
rename to mindspore/ccsrc/minddata/mindrecord/CMakeLists.txt
diff --git a/mindspore/ccsrc/mindrecord/common/shard_error.cc b/mindspore/ccsrc/minddata/mindrecord/common/shard_error.cc
similarity index 98%
rename from mindspore/ccsrc/mindrecord/common/shard_error.cc
rename to mindspore/ccsrc/minddata/mindrecord/common/shard_error.cc
index ad68aaf92c7..e4d35b8305f 100644
--- a/mindspore/ccsrc/mindrecord/common/shard_error.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/common/shard_error.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_error.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/common/shard_pybind.cc b/mindspore/ccsrc/minddata/mindrecord/common/shard_pybind.cc
similarity index 96%
rename from mindspore/ccsrc/mindrecord/common/shard_pybind.cc
rename to mindspore/ccsrc/minddata/mindrecord/common/shard_pybind.cc
index ee923ebc977..d9e51efc4e9 100644
--- a/mindspore/ccsrc/mindrecord/common/shard_pybind.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/common/shard_pybind.cc
@@ -17,12 +17,12 @@
 #include <string>
 #include <vector>
 #include "common/utils.h"
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_index_generator.h"
-#include "mindrecord/include/shard_reader.h"
-#include "mindrecord/include/shard_segment.h"
-#include "mindrecord/include/shard_writer.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_index_generator.h"
+#include "minddata/mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_segment.h"
+#include "minddata/mindrecord/include/shard_writer.h"
 #include "nlohmann/json.hpp"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
diff --git a/mindspore/ccsrc/mindrecord/common/shard_utils.cc b/mindspore/ccsrc/minddata/mindrecord/common/shard_utils.cc
similarity index 99%
rename from mindspore/ccsrc/mindrecord/common/shard_utils.cc
rename to mindspore/ccsrc/minddata/mindrecord/common/shard_utils.cc
index edeabb3cdee..b5021802a04 100644
--- a/mindspore/ccsrc/mindrecord/common/shard_utils.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/common/shard_utils.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
 #include "common/utils.h"
 #include "./securec.h"
 
diff --git a/mindspore/ccsrc/mindrecord/include/common/shard_pybind.h b/mindspore/ccsrc/minddata/mindrecord/include/common/shard_pybind.h
similarity index 95%
rename from mindspore/ccsrc/mindrecord/include/common/shard_pybind.h
rename to mindspore/ccsrc/minddata/mindrecord/include/common/shard_pybind.h
index 86c71a0ea73..3b3698ca684 100644
--- a/mindspore/ccsrc/mindrecord/include/common/shard_pybind.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/common/shard_pybind.h
@@ -19,7 +19,7 @@
 
 #include <string>
 #include <vector>
-#include "mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
 #include "pybind11/pybind11.h"
 
 namespace py = pybind11;
diff --git a/mindspore/ccsrc/mindrecord/include/common/shard_utils.h b/mindspore/ccsrc/minddata/mindrecord/include/common/shard_utils.h
similarity index 99%
rename from mindspore/ccsrc/mindrecord/include/common/shard_utils.h
rename to mindspore/ccsrc/minddata/mindrecord/include/common/shard_utils.h
index 8aa5bdfbda4..bd1cda8a99c 100644
--- a/mindspore/ccsrc/mindrecord/include/common/shard_utils.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/common/shard_utils.h
@@ -41,7 +41,7 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_error.h"
 #include "nlohmann/json.hpp"
 #include "./sqlite3.h"
 #include "utils/log_adapter.h"
diff --git a/mindspore/ccsrc/mindrecord/include/shard_category.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_category.h
similarity index 97%
rename from mindspore/ccsrc/mindrecord/include/shard_category.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_category.h
index 618a91b1d8c..ed1e748afed 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_category.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_category.h
@@ -22,7 +22,7 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_operator.h"
+#include "minddata/mindrecord/include/shard_operator.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_column.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_column.h
similarity index 99%
rename from mindspore/ccsrc/mindrecord/include/shard_column.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_column.h
index 968d82e717f..f6353ed3ce5 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_column.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_column.h
@@ -22,7 +22,7 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_header.h"
+#include "minddata/mindrecord/include/shard_header.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_distributed_sample.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_distributed_sample.h
similarity index 91%
rename from mindspore/ccsrc/mindrecord/include/shard_distributed_sample.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_distributed_sample.h
index ef0ad738c4c..f166ec1e6c6 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_distributed_sample.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_distributed_sample.h
@@ -21,9 +21,9 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_operator.h"
-#include "mindrecord/include/shard_shuffle.h"
-#include "mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_operator.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
+#include "minddata/mindrecord/include/shard_sample.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_error.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_error.h
similarity index 100%
rename from mindspore/ccsrc/mindrecord/include/shard_error.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_error.h
diff --git a/mindspore/ccsrc/mindrecord/include/shard_header.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_header.h
similarity index 94%
rename from mindspore/ccsrc/mindrecord/include/shard_header.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_header.h
index e4361c466a8..67169e8696a 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_header.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_header.h
@@ -22,12 +22,12 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_index.h"
-#include "mindrecord/include/shard_page.h"
-#include "mindrecord/include/shard_schema.h"
-#include "mindrecord/include/shard_statistics.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_index.h"
+#include "minddata/mindrecord/include/shard_page.h"
+#include "minddata/mindrecord/include/shard_schema.h"
+#include "minddata/mindrecord/include/shard_statistics.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_index.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_index.h
similarity index 90%
rename from mindspore/ccsrc/mindrecord/include/shard_index.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_index.h
index d430c5bdcf9..79b10893fbd 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_index.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_index.h
@@ -24,9 +24,9 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_schema.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_schema.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_index_generator.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_index_generator.h
similarity index 98%
rename from mindspore/ccsrc/mindrecord/include/shard_index_generator.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_index_generator.h
index b081b7a0a01..fb85d9adbcb 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_index_generator.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_index_generator.h
@@ -25,7 +25,7 @@
 #include <tuple>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_header.h"
+#include "minddata/mindrecord/include/shard_header.h"
 #include "./sqlite3.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_operator.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_operator.h
similarity index 97%
rename from mindspore/ccsrc/mindrecord/include/shard_operator.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_operator.h
index f33e3db5f4b..b5ea53b759a 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_operator.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_operator.h
@@ -18,7 +18,7 @@
 #define MINDRECORD_INCLUDE_SHARD_OPERATOR_H_
 
 #include <memory>
-#include "mindrecord/include/shard_task.h"
+#include "minddata/mindrecord/include/shard_task.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_page.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_page.h
similarity index 98%
rename from mindspore/ccsrc/mindrecord/include/shard_page.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_page.h
index c22acd8d2c3..01c70acf297 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_page.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_page.h
@@ -23,7 +23,7 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
 #include "pybind11/pybind11.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/mindrecord/include/shard_pk_sample.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_pk_sample.h
similarity index 89%
rename from mindspore/ccsrc/mindrecord/include/shard_pk_sample.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_pk_sample.h
index 4f1a1c307a0..2d420b563dd 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_pk_sample.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_pk_sample.h
@@ -21,9 +21,9 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_operator.h"
-#include "mindrecord/include/shard_shuffle.h"
-#include "mindrecord/include/shard_category.h"
+#include "minddata/mindrecord/include/shard_operator.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
+#include "minddata/mindrecord/include/shard_category.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_reader.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_reader.h
similarity index 96%
rename from mindspore/ccsrc/mindrecord/include/shard_reader.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_reader.h
index 1f2138d6d5e..b1b0c1397a3 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_reader.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_reader.h
@@ -42,16 +42,16 @@
 #include <unordered_set>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_category.h"
-#include "mindrecord/include/shard_column.h"
-#include "mindrecord/include/shard_distributed_sample.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_index_generator.h"
-#include "mindrecord/include/shard_operator.h"
-#include "mindrecord/include/shard_reader.h"
-#include "mindrecord/include/shard_sample.h"
-#include "mindrecord/include/shard_shuffle.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_category.h"
+#include "minddata/mindrecord/include/shard_column.h"
+#include "minddata/mindrecord/include/shard_distributed_sample.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_index_generator.h"
+#include "minddata/mindrecord/include/shard_operator.h"
+#include "minddata/mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_sample.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_sample.h
similarity index 93%
rename from mindspore/ccsrc/mindrecord/include/shard_sample.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_sample.h
index a32acbff6ec..ce813bc4bf4 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_sample.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_sample.h
@@ -21,8 +21,8 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_operator.h"
-#include "mindrecord/include/shard_shuffle.h"
+#include "minddata/mindrecord/include/shard_operator.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_schema.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_schema.h
similarity index 94%
rename from mindspore/ccsrc/mindrecord/include/shard_schema.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_schema.h
index 4ef134bde20..56eae85e5a1 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_schema.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_schema.h
@@ -22,9 +22,9 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "mindrecord/include/common/shard_pybind.h"
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/common/shard_pybind.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_error.h"
 #include "pybind11/pybind11.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/mindrecord/include/shard_segment.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_segment.h
similarity index 98%
rename from mindspore/ccsrc/mindrecord/include/shard_segment.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_segment.h
index 12497a5acef..45d9bda3381 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_segment.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_segment.h
@@ -21,7 +21,7 @@
 #include <tuple>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_reader.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_sequential_sample.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_sequential_sample.h
similarity index 96%
rename from mindspore/ccsrc/mindrecord/include/shard_sequential_sample.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_sequential_sample.h
index a8ee3a36db8..724be9acaf4 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_sequential_sample.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_sequential_sample.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_sample.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_shuffle.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_shuffle.h
similarity index 96%
rename from mindspore/ccsrc/mindrecord/include/shard_shuffle.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_shuffle.h
index adb172bdcc4..d7f736b55ba 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_shuffle.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_shuffle.h
@@ -18,7 +18,7 @@
 #define MINDRECORD_INCLUDE_SHARD_SHUFFLE_H_
 
 #include <random>
-#include "mindrecord/include/shard_operator.h"
+#include "minddata/mindrecord/include/shard_operator.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_statistics.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_statistics.h
similarity index 93%
rename from mindspore/ccsrc/mindrecord/include/shard_statistics.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_statistics.h
index 7fc2f968cdf..f100bb9833f 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_statistics.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_statistics.h
@@ -24,9 +24,9 @@
 #include <string>
 #include <vector>
 
-#include "mindrecord/include/common/shard_pybind.h"
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/common/shard_pybind.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_error.h"
 #include "pybind11/pybind11.h"
 #include "utils/log_adapter.h"
 
diff --git a/mindspore/ccsrc/mindrecord/include/shard_task.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_task.h
similarity index 96%
rename from mindspore/ccsrc/mindrecord/include/shard_task.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_task.h
index 4a12eb9e453..f07da656f2c 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_task.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_task.h
@@ -22,7 +22,7 @@
 #include <string>
 #include <tuple>
 #include <vector>
-#include "mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/include/shard_writer.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h
similarity index 97%
rename from mindspore/ccsrc/mindrecord/include/shard_writer.h
rename to mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h
index 6175180c927..833928773e6 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_writer.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h
@@ -35,11 +35,11 @@
 #include <tuple>
 #include <utility>
 #include <vector>
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_column.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_header.h"
-#include "mindrecord/include/shard_index.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_column.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_header.h"
+#include "minddata/mindrecord/include/shard_index.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 #include "utils/log_adapter.h"
diff --git a/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc
similarity index 99%
rename from mindspore/ccsrc/mindrecord/io/shard_index_generator.cc
rename to mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc
index 16c730bd4cd..f9b18a3bf02 100644
--- a/mindspore/ccsrc/mindrecord/io/shard_index_generator.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc
@@ -15,7 +15,7 @@
  */
 #include <thread>
 
-#include "mindrecord/include/shard_index_generator.h"
+#include "minddata/mindrecord/include/shard_index_generator.h"
 #include "common/utils.h"
 
 using mindspore::LogStream;
diff --git a/mindspore/ccsrc/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc
similarity index 99%
rename from mindspore/ccsrc/mindrecord/io/shard_reader.cc
rename to mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc
index 99fa0c447d2..84d7fddb6f1 100644
--- a/mindspore/ccsrc/mindrecord/io/shard_reader.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_distributed_sample.h"
-#include "mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_distributed_sample.h"
+#include "minddata/mindrecord/include/shard_reader.h"
 #include "common/utils.h"
 
 using mindspore::LogStream;
diff --git a/mindspore/ccsrc/mindrecord/io/shard_segment.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_segment.cc
similarity index 99%
rename from mindspore/ccsrc/mindrecord/io/shard_segment.cc
rename to mindspore/ccsrc/minddata/mindrecord/io/shard_segment.cc
index fb1120b1784..eda8924e131 100644
--- a/mindspore/ccsrc/mindrecord/io/shard_segment.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_segment.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_segment.h"
+#include "minddata/mindrecord/include/shard_segment.h"
 #include "common/utils.h"
 
 #include "./securec.h"
-#include "mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
 #include "pybind11/pybind11.h"
 
 using mindspore::LogStream;
diff --git a/mindspore/ccsrc/mindrecord/io/shard_writer.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_writer.cc
similarity index 99%
rename from mindspore/ccsrc/mindrecord/io/shard_writer.cc
rename to mindspore/ccsrc/minddata/mindrecord/io/shard_writer.cc
index 913caab550a..e85229cc34e 100644
--- a/mindspore/ccsrc/mindrecord/io/shard_writer.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_writer.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_writer.h"
+#include "minddata/mindrecord/include/shard_writer.h"
 #include "common/utils.h"
-#include "mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
 #include "./securec.h"
 
 using mindspore::LogStream;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_category.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_category.cc
similarity index 96%
rename from mindspore/ccsrc/mindrecord/meta/shard_category.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_category.cc
index bd427a330a7..eb1428a2ade 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_category.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_category.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_category.h"
+#include "minddata/mindrecord/include/shard_category.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_column.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_column.cc
similarity index 99%
rename from mindspore/ccsrc/mindrecord/meta/shard_column.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_column.cc
index 28dc243e172..4cc5e9f413a 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_column.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_column.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_column.h"
+#include "minddata/mindrecord/include/shard_column.h"
 
 #include "common/utils.h"
-#include "mindrecord/include/common/shard_utils.h"
-#include "mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/shard_error.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_distributed_sample.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_distributed_sample.cc
similarity index 97%
rename from mindspore/ccsrc/mindrecord/meta/shard_distributed_sample.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_distributed_sample.cc
index b7e890da7ce..4c7abbb4b48 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_distributed_sample.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_distributed_sample.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_distributed_sample.h"
+#include "minddata/mindrecord/include/shard_distributed_sample.h"
 
 using mindspore::LogStream;
 using mindspore::ExceptionType::NoExceptionType;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_header.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_header.cc
similarity index 99%
rename from mindspore/ccsrc/mindrecord/meta/shard_header.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_header.cc
index ec177394ef3..500037399b0 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_header.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_header.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_header.h"
+#include "minddata/mindrecord/include/shard_header.h"
 
 #include <map>
 #include <memory>
@@ -23,8 +23,8 @@
 #include <vector>
 
 #include "common/utils.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_page.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_page.h"
 
 using mindspore::LogStream;
 using mindspore::ExceptionType::NoExceptionType;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_index.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_index.cc
similarity index 95%
rename from mindspore/ccsrc/mindrecord/meta/shard_index.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_index.cc
index 8b7a3c03420..73397b5bba0 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_index.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_index.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_index.h"
+#include "minddata/mindrecord/include/shard_index.h"
 
 namespace mindspore {
 namespace mindrecord {
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_page.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_page.cc
similarity index 96%
rename from mindspore/ccsrc/mindrecord/meta/shard_page.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_page.cc
index 6bb849ae1db..ba2292415f6 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_page.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_page.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_page.h"
+#include "minddata/mindrecord/include/shard_page.h"
 #include "pybind11/pybind11.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_pk_sample.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_pk_sample.cc
similarity index 96%
rename from mindspore/ccsrc/mindrecord/meta/shard_pk_sample.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_pk_sample.cc
index fac2fec708b..081a48352de 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_pk_sample.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_pk_sample.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_pk_sample.h"
+#include "minddata/mindrecord/include/shard_pk_sample.h"
 
 using mindspore::LogStream;
 using mindspore::ExceptionType::NoExceptionType;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_sample.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_sample.cc
similarity index 98%
rename from mindspore/ccsrc/mindrecord/meta/shard_sample.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_sample.cc
index c207747194a..808ab55bfbe 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_sample.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_sample.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_sample.h"
 
 using mindspore::LogStream;
 using mindspore::ExceptionType::NoExceptionType;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_schema.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_schema.cc
similarity index 98%
rename from mindspore/ccsrc/mindrecord/meta/shard_schema.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_schema.cc
index ee0f5afa4ae..093be9792f1 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_schema.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_schema.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_schema.h"
+#include "minddata/mindrecord/include/shard_schema.h"
 #include "common/utils.h"
 
 using mindspore::LogStream;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_sequential_sample.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_sequential_sample.cc
similarity index 97%
rename from mindspore/ccsrc/mindrecord/meta/shard_sequential_sample.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_sequential_sample.cc
index a7fa4e73433..3aa695e03b4 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_sequential_sample.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_sequential_sample.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_sequential_sample.h"
+#include "minddata/mindrecord/include/shard_sequential_sample.h"
 
 using mindspore::LogStream;
 using mindspore::ExceptionType::NoExceptionType;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_shuffle.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_shuffle.cc
similarity index 98%
rename from mindspore/ccsrc/mindrecord/meta/shard_shuffle.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_shuffle.cc
index 5cf49b04f0b..7743cabea30 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_shuffle.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_shuffle.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_shuffle.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
 
 #include <algorithm>
 
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_statistics.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_statistics.cc
similarity index 98%
rename from mindspore/ccsrc/mindrecord/meta/shard_statistics.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_statistics.cc
index ca36c50863f..7024a2ab061 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_statistics.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_statistics.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_statistics.h"
+#include "minddata/mindrecord/include/shard_statistics.h"
 #include "pybind11/pybind11.h"
 
 using mindspore::LogStream;
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_task.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_task.cc
similarity index 97%
rename from mindspore/ccsrc/mindrecord/meta/shard_task.cc
rename to mindspore/ccsrc/minddata/mindrecord/meta/shard_task.cc
index 8baa3c26cdb..6f8e440f91c 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_task.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_task.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "mindrecord/include/shard_task.h"
+#include "minddata/mindrecord/include/shard_task.h"
 #include "common/utils.h"
-#include "mindrecord/include/common/shard_utils.h"
+#include "minddata/mindrecord/include/common/shard_utils.h"
 
 using mindspore::LogStream;
 using mindspore::ExceptionType::NoExceptionType;
diff --git a/mindspore/ccsrc/parallel/ops_info/ops_info_head_files.h b/mindspore/ccsrc/parallel/ops_info/ops_info_head_files.h
deleted file mode 100644
index 45b00aed30e..00000000000
--- a/mindspore/ccsrc/parallel/ops_info/ops_info_head_files.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MINDSPORE_CCSRC_PARALLEL_OPS_INFO_OPS_INFO_HEAD_FILES_H_
-#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_OPS_INFO_HEAD_FILES_H_
-
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/ops_info/arithmetic_info.h"
-#include "parallel/ops_info/batch_parallel_info.h"
-#include "parallel/ops_info/bias_add_info.h"
-#include "parallel/ops_info/comparison_function_info.h"
-#include "parallel/ops_info/dropout_do_mask_info.h"
-#include "parallel/ops_info/elementary_function_info.h"
-#include "parallel/ops_info/gather_v2_info.h"
-#include "parallel/ops_info/get_next_info.h"
-#include "parallel/ops_info/l2_normalize_info.h"
-#include "parallel/ops_info/layer_norm_info.h"
-#include "parallel/ops_info/loss_info.h"
-#include "parallel/ops_info/matmul_info.h"
-#include "parallel/ops_info/onehot_info.h"
-#include "parallel/ops_info/prelu_info.h"
-#include "parallel/ops_info/reduce_method_info.h"
-#include "parallel/ops_info/reshape_info.h"
-#include "parallel/ops_info/transpose_info.h"
-#include "parallel/ops_info/virtual_dataset_info.h"
-#include "parallel/ops_info/gather_v2_p_info.h"
-
-#endif  // MINDSPORE_CCSRC_PARALLEL_OPS_INFO_HEAD_FILES_H_
diff --git a/mindspore/ccsrc/pipeline/CMakeLists.txt b/mindspore/ccsrc/pipeline/jit/CMakeLists.txt
similarity index 90%
rename from mindspore/ccsrc/pipeline/CMakeLists.txt
rename to mindspore/ccsrc/pipeline/jit/CMakeLists.txt
index 39664d717dd..6188546ce52 100644
--- a/mindspore/ccsrc/pipeline/CMakeLists.txt
+++ b/mindspore/ccsrc/pipeline/jit/CMakeLists.txt
@@ -24,4 +24,4 @@ if (ENABLE_GE OR ENABLE_D)
     list(APPEND _PIPELINE_SRC_FILES ${_PIPELINE_GE_SRC_FILES})
 endif ()
 
-add_library(_mindspore_pipeline_obj OBJECT ${_PIPELINE_SRC_FILES})
+add_library(_mindspore_pipeline_jit_obj OBJECT ${_PIPELINE_SRC_FILES})
diff --git a/mindspore/ccsrc/pipeline/action.cc b/mindspore/ccsrc/pipeline/jit/action.cc
similarity index 93%
rename from mindspore/ccsrc/pipeline/action.cc
rename to mindspore/ccsrc/pipeline/jit/action.cc
index 89598ae85d4..74eb9f3f9b5 100644
--- a/mindspore/ccsrc/pipeline/action.cc
+++ b/mindspore/ccsrc/pipeline/jit/action.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/action.h"
+#include "pipeline/jit/action.h"
 
 #include <memory>
 #include <utility>
@@ -24,22 +24,22 @@
 #include <functional>
 
 #include "ir/func_graph_cloner.h"
-#include "ir/param_value_py.h"
-#include "parallel/costmodel_context.h"
-#include "parallel/context.h"
-#include "pipeline/pass.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/static_analysis.h"
-#include "pipeline/static_analysis/program_specialize.h"
-#include "pipeline/resource.h"
+#include "ir/param_value.h"
+#include "frontend/parallel/costmodel_context.h"
+#include "frontend/parallel/context.h"
+#include "pipeline/jit/pass.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "abstract/abstract_value.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/program_specialize.h"
+#include "pipeline/jit/resource.h"
 #include "utils/context/ms_context.h"
-#include "pipeline/remove_value_node_dup.h"
-#include "optimizer/optimizer.h"
+#include "pipeline/jit/remove_value_node_dup.h"
+#include "frontend/optimizer/optimizer.h"
 #include "vm/transform.h"
 #include "parse/python_adapter.h"
-#include "optimizer/py_pass_manager.h"
+#include "frontend/optimizer/py_pass_manager.h"
 
 namespace mindspore {
 namespace pipeline {
@@ -228,14 +228,10 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
   for (const auto &param : func_graph->parameters()) {
     auto param_node = std::static_pointer_cast<Parameter>(param);
     if (param_node->has_default()) {
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param());
-      AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true);
-      auto sparse_grad =
-        py::cast<std::string>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad"));
-      ptr->set_sparse_grad(sparse_grad);
-      auto has_indexed_slices_grad =
-        py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "has_indexed_slices_grad"));
-      ptr->set_has_indexed_slices_grad(has_indexed_slices_grad);
+      const auto &param_value = param_node->default_param();
+      ValuePtr value = param_value->value();
+      constexpr bool broaden = true;
+      AbstractBasePtr ptr = abstract::FromValue(value, broaden);
 
       parallel::ParallelParameterContextRestoreInNoTraining(func_graph, param_node, ptr);
       args_spec.push_back(ptr);
@@ -439,7 +435,7 @@ bool ResolveActionPyStub(const ResourcePtr &res) {
 }
 
 bool OptActionPyStub(const ResourcePtr &res) {
-  ActionPyStub(res, opt::python_pass::Phase::RESOLVE);
+  ActionPyStub(res, opt::python_pass::Phase::OPT);
   return true;
 }
 
diff --git a/mindspore/ccsrc/pipeline/action.h b/mindspore/ccsrc/pipeline/jit/action.h
similarity index 98%
rename from mindspore/ccsrc/pipeline/action.h
rename to mindspore/ccsrc/pipeline/jit/action.h
index eed13078723..0a1feab1c9f 100644
--- a/mindspore/ccsrc/pipeline/action.h
+++ b/mindspore/ccsrc/pipeline/jit/action.h
@@ -21,7 +21,7 @@
 #include <functional>
 #include <utility>
 #include <string>
-#include "pipeline/resource.h"
+#include "pipeline/jit/resource.h"
 #include "vm/segment_runner.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pipeline/base.h b/mindspore/ccsrc/pipeline/jit/base.h
similarity index 98%
rename from mindspore/ccsrc/pipeline/base.h
rename to mindspore/ccsrc/pipeline/jit/base.h
index 57edea03a20..0a8a2b75f3d 100644
--- a/mindspore/ccsrc/pipeline/base.h
+++ b/mindspore/ccsrc/pipeline/jit/base.h
@@ -23,7 +23,7 @@
 #include <sstream>
 
 #include "ir/anf.h"
-#include "pipeline/resource.h"
+#include "pipeline/jit/resource.h"
 #include "utils/context/ms_context.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pipeline/init.cc b/mindspore/ccsrc/pipeline/jit/init.cc
similarity index 95%
rename from mindspore/ccsrc/pipeline/init.cc
rename to mindspore/ccsrc/pipeline/jit/init.cc
index f28be181ddd..65adebb6e25 100644
--- a/mindspore/ccsrc/pipeline/init.cc
+++ b/mindspore/ccsrc/pipeline/jit/init.cc
@@ -16,29 +16,28 @@
 
 #include <pybind11/operators.h>
 #include <pybind11/stl.h>
-#include "kernel/oplib/oplib.h"
-#include "kernel/oplib/oploader.h"
-#include "pipeline/pipeline.h"
-#include "operator/composite/composite.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/oplib/oploader.h"
+#include "pipeline/jit/pipeline.h"
+#include "frontend/operator/composite/composite.h"
 #include "ir/signature.h"
-#include "pynative/pynative_execute.h"
+#include "pipeline/pynative/pynative_execute.h"
 #include "utils/symbolic.h"
 #include "pybind_api/api_register.h"
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include "utils/summary/event_writer.h"
 #include "utils/config_manager.h"
 #include "utils/mpi/mpi_config.h"
-#include "parallel/context.h"
-#include "parallel/device_manager.h"
-#include "parallel/costmodel_context.h"
+#include "frontend/parallel/context.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/costmodel_context.h"
 #ifdef ENABLE_GPU_COLLECTIVE
-#include "device/gpu/distribution/collective_init.h"
+#include "runtime/device/gpu/distribution/collective_init.h"
 #else
-#include "device/gpu/distribution/collective_fake_init.h"
+#include "runtime/device/gpu/distribution/collective_fake_init.h"
 #endif
 namespace py = pybind11;
 
-using FuncGraph = mindspore::FuncGraph;
 using EnvInstance = mindspore::EnvInstance;
 using ExecutorPy = mindspore::pipeline::ExecutorPy;
 using Pipeline = mindspore::pipeline::Pipeline;
@@ -54,10 +53,6 @@ using CostModelContext = mindspore::parallel::CostModelContext;
 PYBIND11_MODULE(_c_expression, m) {
   m.doc() = "MindSpore c plugin";
 
-  (void)py::class_<MetaFuncGraph, std::shared_ptr<MetaFuncGraph>>(*m, "MetaFuncGraph_")
-    .def_readonly(mindspore::PYTHON_METAFUNCGRAPH_FLAG, &mindspore::MetaFuncGraph::parse_info_)
-    .def(py::init<std::string &>());
-
   auto fns = mindspore::PybindDefineRegister::AllFuncs();
   for (auto &item : fns) {
     item.second(&m);
@@ -85,8 +80,6 @@ PYBIND11_MODULE(_c_expression, m) {
          py::arg("broadcast_params") = py::dict(), "Build data graph.")
     .def("has_compiled", &ExecutorPy::HasCompiled, py::arg("phase") = py::str(""), "get if cell compiled.")
     .def("run_init_graph", &ExecutorPy::RunInitGraph, "Run init Graph.");
-  // Class Graph interface
-  (void)py::class_<FuncGraph, mindspore::FuncGraphPtr>(m, "FuncGraph").def(py::init());
 
   (void)py::class_<EnvInstance, std::shared_ptr<EnvInstance>>(m, "EnvInstance_")
     .def_readonly(mindspore::PYTHON_ENVINSTANCE_FLAG, &mindspore::EnvInstance::parse_info_)
@@ -155,8 +148,8 @@ PYBIND11_MODULE(_c_expression, m) {
     .def("set_enable_graph_kernel", &mindspore::MsContext::set_enable_graph_kernel,
          "Set the GraphKernel switch to on or off.")
     .def("get_enable_graph_kernel", &mindspore::MsContext::enable_graph_kernel, "Get the value of GraphKernel switch.")
-    .def("get_enable_sparse_flag", &mindspore::MsContext::enable_sparse_flag, "Get whether to enable sparse.")
-    .def("set_enable_sparse_flag", &mindspore::MsContext::set_enable_sparse_flag, "Set whether to enable sparse.");
+    .def("get_enable_sparse", &mindspore::MsContext::enable_sparse, "Get whether to enable sparsity.")
+    .def("set_enable_sparse", &mindspore::MsContext::set_enable_sparse, "Set whether to enable sparsity.");
 
   (void)py::class_<mindspore::MpiConfig, std::shared_ptr<mindspore::MpiConfig>>(m, "MpiConfig")
     .def_static("get_instance", &mindspore::MpiConfig::GetInstance, "Get mpi config instance.")
@@ -323,7 +316,7 @@ PYBIND11_MODULE(_c_expression, m) {
 
   (void)py::class_<OpLib, std::shared_ptr<OpLib>>(m, "Oplib")
     .def(py::init())
-    .def("reg_op", &OpLib::RegOp, "Register op info.");
+    .def_static("reg_op", &OpLib::RegOp, "Register op info.");
 #ifdef ENABLE_GPU_COLLECTIVE
   (void)m.def("init_gpu_collective", &mindspore::device::gpu::CollectiveInitializer::InitCollective,
               "Init gpu collective communication mode.");
diff --git a/mindspore/ccsrc/pipeline/parse/data_converter.cc b/mindspore/ccsrc/pipeline/jit/parse/data_converter.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/parse/data_converter.cc
rename to mindspore/ccsrc/pipeline/jit/parse/data_converter.cc
index 330d03d11ca..baef64481ba 100644
--- a/mindspore/ccsrc/pipeline/parse/data_converter.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/data_converter.cc
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/parse/data_converter.h"
+#include "pipeline/jit/parse/data_converter.h"
 #include <unordered_map>
 #include <map>
 #include <utility>
@@ -24,15 +24,15 @@
 #include <memory>
 #include <vector>
 #include <list>
-#include "pipeline/parse/resolve.h"
-#include "pipeline/parse/python_adapter.h"
-#include "operator/ops.h"
-#include "operator/composite/composite.h"
+#include "pipeline/jit/parse/resolve.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "frontend/operator/ops.h"
+#include "frontend/operator/composite/composite.h"
 #include "ir/func_graph_cloner.h"
 #include "utils/symbolic.h"
 #include "utils/context/ms_context.h"
 #include "debug/trace.h"
-#include "optimizer/ad/grad.h"
+#include "frontend/optimizer/ad/grad.h"
 
 namespace mindspore {
 namespace parse {
diff --git a/mindspore/ccsrc/pipeline/parse/data_converter.h b/mindspore/ccsrc/pipeline/jit/parse/data_converter.h
similarity index 95%
rename from mindspore/ccsrc/pipeline/parse/data_converter.h
rename to mindspore/ccsrc/pipeline/jit/parse/data_converter.h
index 0165b553631..6632d4801e8 100644
--- a/mindspore/ccsrc/pipeline/parse/data_converter.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/data_converter.h
@@ -24,8 +24,8 @@
 #include <vector>
 #include <string>
 #include <unordered_map>
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pipeline/parse/function_block.cc b/mindspore/ccsrc/pipeline/jit/parse/function_block.cc
similarity index 96%
rename from mindspore/ccsrc/pipeline/parse/function_block.cc
rename to mindspore/ccsrc/pipeline/jit/parse/function_block.cc
index fbeeba94a1d..b52dddda66f 100644
--- a/mindspore/ccsrc/pipeline/parse/function_block.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/function_block.cc
@@ -16,13 +16,13 @@
  * limitations under the License.
  */
 
-#include "pipeline/parse/function_block.h"
+#include "pipeline/jit/parse/function_block.h"
 #include <string>
 #include <memory>
 #include <fstream>
-#include "pipeline/parse/resolve.h"
-#include "pipeline/parse/parse.h"
-#include "operator/ops.h"
+#include "pipeline/jit/parse/resolve.h"
+#include "pipeline/jit/parse/parse.h"
+#include "frontend/operator/ops.h"
 #include "debug/info.h"
 #include "debug/trace.h"
 #include "pybind11/pybind11.h"
@@ -294,13 +294,18 @@ void FunctionBlock::Jump(const FunctionBlockPtr &target_block, AnfNodePtr node)
 // Perform a conditional jump using switch operation.
 // The first CNode select graph with condition, and than execute this graph
 void FunctionBlock::ConditionalJump(AnfNodePtr condNode, const FunctionBlockPtr &true_block,
-                                    const FunctionBlockPtr &false_block) {
+                                    const FunctionBlockPtr &false_block, bool unroll_loop) {
   if (func_graph()->get_return() != nullptr) {
     MS_LOG(EXCEPTION) << "Failure: have return node! NodeInfo: "
                       << trace::GetDebugInfo(func_graph()->get_return()->debug_info());
   }
+  // Here we need set an attribute to primtive 'switch', so we create a new variable instead of global 'kPrimSwitch'
+  auto prim_switch = std::make_shared<Primitive>(prim::kPrimSwitch->name());
+  if (!unroll_loop) {
+    prim_switch->AddAttr(prim::SWITCH_UNROLL_FLAG, MakeValue(0));
+  }
   CNodePtr switch_app =
-    func_graph()->NewCNode({NewValueNode(prim::kPrimSwitch), condNode, NewValueNode(true_block->func_graph()),
+    func_graph()->NewCNode({NewValueNode(prim_switch), condNode, NewValueNode(true_block->func_graph()),
                             NewValueNode(false_block->func_graph())});
   CNodePtr switch_app_new = func_graph()->NewCNode({switch_app});
   func_graph()->set_output(switch_app_new);
diff --git a/mindspore/ccsrc/pipeline/parse/function_block.h b/mindspore/ccsrc/pipeline/jit/parse/function_block.h
similarity index 96%
rename from mindspore/ccsrc/pipeline/parse/function_block.h
rename to mindspore/ccsrc/pipeline/jit/parse/function_block.h
index 346061430dc..cbf75a3dd84 100644
--- a/mindspore/ccsrc/pipeline/parse/function_block.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/function_block.h
@@ -26,7 +26,7 @@
 #include <unordered_map>
 #include <memory>
 #include <utility>
-#include "pipeline/parse/parse_base.h"
+#include "pipeline/jit/parse/parse_base.h"
 #include "utils/log_adapter.h"
 #include "utils/ordered_map.h"
 
@@ -59,7 +59,8 @@ class FunctionBlock : public std::enable_shared_from_this<FunctionBlock> {
   CNodePtr ForceToWhileCond(const AnfNodePtr &cond);
   void Jump(const FunctionBlockPtr &block, AnfNodePtr node);
   AnfNodePtr SearchReplaceNode(const std::string &var, const ParameterPtr &phi);
-  void ConditionalJump(AnfNodePtr condNode, const FunctionBlockPtr &trueBlock, const FunctionBlockPtr &falseBlock);
+  void ConditionalJump(AnfNodePtr condNode, const FunctionBlockPtr &trueBlock, const FunctionBlockPtr &falseBlock,
+                       bool unroll_loop = true);
   // record the assign statement of self.xx weight parameter ,which will use state_setitem op
   void SetStateAssgin(const AnfNodePtr &target, const std::string &readid);
   void AddAutoDepend(const AnfNodePtr &target);
diff --git a/mindspore/ccsrc/pipeline/parse/parse.cc b/mindspore/ccsrc/pipeline/jit/parse/parse.cc
similarity index 91%
rename from mindspore/ccsrc/pipeline/parse/parse.cc
rename to mindspore/ccsrc/pipeline/jit/parse/parse.cc
index 77e865cee93..edc9a66594b 100644
--- a/mindspore/ccsrc/pipeline/parse/parse.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/parse.cc
@@ -16,15 +16,15 @@
  * limitations under the License.
  */
 
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include <string>
 #include <memory>
 #include <sstream>
 #include <unordered_map>
 #include <algorithm>
-#include "operator/ops.h"
-#include "pipeline/parse/data_converter.h"
-#include "operator/composite/composite.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "frontend/operator/composite/composite.h"
 #include "utils/context/ms_context.h"
 #include "debug/trace.h"
 
@@ -1002,6 +1002,7 @@ CNodePtr Parser::GenerateIteratorInFor(const FunctionBlockPtr &block, const py::
   AnfNodePtr iter_anf_node = ParseExprNode(block, iter_node);
   return block->func_graph()->NewCNode({op_iter, iter_anf_node});
 }
+
 CNodePtr Parser::GenerateCondInFor(const ParameterPtr &iter_param, const FunctionBlockPtr &header_block,
                                    const AnfNodePtr &op_hasnext) {
   MS_EXCEPTION_IF_NULL(header_block);
@@ -1018,12 +1019,57 @@ FunctionBlockPtr Parser::GenerateBlockInFor(const TraceInfoPtr &trace_info) {
 // A for loop will generate 3 functions :the test, the body, and the continuation
 // for x in xs:
 //    body
-// it  compiled to be following statement
+// it is compiled to be following statement
+// if len(xs) < max_loop_cnt:
+//    ParseForIter()  // use iter to implement for loop, which always unroll loop
+// else:
+//    ParseForLoop()  // use loop var to implement for loop, which always sink loop
+FunctionBlockPtr Parser::ParseFor(const FunctionBlockPtr &block, const py::object &node) {
+  MS_LOG(DEBUG) << "Process ast For, create an if else statement";
+  MS_EXCEPTION_IF_NULL(block);
+  // create statement 'len(xs) < prim::MAX_FOR_LOOP_COUNT'
+  AnfNodePtr op_len = block->MakeResolveSymbol(NAMED_PRIMITIVE_LEN);
+  py::object iter_obj = python_adapter::GetPyObjAttr(node, NAMED_PRIMITIVE_ITER);
+  AnfNodePtr iter_node = ParseExprNode(block, iter_obj);
+  CNodePtr len_iter = block->func_graph()->NewCNode({op_len, iter_node});
+  CNodePtr bool_node = block->func_graph()->NewCNode(
+    {NewValueNode(prim::kPrimScalarLt), len_iter, NewValueNode(prim::MAX_FOR_LOOP_COUNT)});
+
+  // create statement 'if len(xs) < prim::MAX_FOR_LOOP_COUNT then ParseForIter else ParseForLoop'
+  TraceManager::DebugTrace(std::make_shared<TraceIfStmtTrueBranch>(block->func_graph()->debug_info()));
+  FunctionBlockPtr true_block = MakeFunctionBlock(*this);
+  TraceManager::EndTrace();
+
+  TraceManager::DebugTrace(std::make_shared<TraceIfStmtFalseBranch>(block->func_graph()->debug_info()));
+  FunctionBlockPtr false_block = MakeFunctionBlock(*this);
+  TraceManager::EndTrace();
+
+  MakeConditionBlocks(block, true_block, false_block);
+
+  TraceManager::DebugTrace(std::make_shared<TraceIfStmtAfterBranch>(block->func_graph()->debug_info()));
+  FunctionBlockPtr after_block = MakeFunctionBlock(*this);
+  TraceManager::EndTrace();
+
+  FunctionBlockPtr true_end = ParseForIter(true_block, node);
+  true_end->Jump(after_block, nullptr);
+
+  FunctionBlockPtr false_end = ParseForLoop(false_block, node);
+  false_end->Jump(after_block, nullptr);
+
+  block->ConditionalJump(bool_node, true_block, false_block);
+  after_block->Mature();
+  return after_block;
+}
+
+// A for loop will generate 3 functions :the test, the body, and the continuation
+// for x in xs:
+//    body
+// it is compiled to be following statement
 // it = iter(xs)
 // while hastnext(it)
 //    x, it = next(it)
 //    body
-FunctionBlockPtr Parser::ParseFor(const FunctionBlockPtr &block, const py::object &node) {
+FunctionBlockPtr Parser::ParseForIter(const FunctionBlockPtr &block, const py::object &node) {
   MS_LOG(DEBUG) << "Process ast For";
   MS_EXCEPTION_IF_NULL(block);
   AnfNodePtr op_iter = block->MakeResolveOperation(NAMED_PRIMITIVE_ITER);
@@ -1088,6 +1134,88 @@ FunctionBlockPtr Parser::ParseFor(const FunctionBlockPtr &block, const py::objec
   // No 'break', no end_block.
   return after_block;
 }
+
+// A for loop will generate 3 functions :the test, the body, and the continuation
+// for x in xs:
+//    body
+// it is compiled to be following statement
+// i = 0
+// while i < len(xs)
+//    x = xs[i]
+//    i = i + 1
+//    body
+FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::object &node) {
+  MS_LOG(DEBUG) << "Process ast For by loop variable";
+  MS_EXCEPTION_IF_NULL(block);
+  AnfNodePtr op_len = block->MakeResolveSymbol(NAMED_PRIMITIVE_LEN);
+  AnfNodePtr op_getitem = block->MakeResolveOperation(NAMED_PRIMITIVE_GETITEM);
+
+  // get varibale name of 'x' in statement 'for x in xs'
+  py::object target_node = python_adapter::GetPyObjAttr(node, "target");
+
+  // create statement 'len(xs)'
+  py::object iter_obj = python_adapter::GetPyObjAttr(node, "iter");
+  AnfNodePtr iter_node = ParseExprNode(block, iter_obj);
+  MS_EXCEPTION_IF_NULL(iter_node);
+  CNodePtr len_iter = block->func_graph()->NewCNode({op_len, iter_node});
+
+  FunctionBlockPtr header_block =
+    GenerateBlockInFor(std::make_shared<TraceForHeader>(block->func_graph()->debug_info()));
+  MS_EXCEPTION_IF_NULL(header_block);
+  // create loop variable 'i'
+  ParameterPtr loop_var = header_block->func_graph()->add_parameter();
+  // create loop condition 'i < len(xs)'
+  CNodePtr cond_node = header_block->func_graph()->NewCNode({NewValueNode(prim::kPrimScalarLt), loop_var, len_iter});
+
+  // generate the body of the for statement
+  FunctionBlockPtr body_block = GenerateBlockInFor(std::make_shared<TraceForBody>(block->func_graph()->debug_info()));
+  MS_EXCEPTION_IF_NULL(body_block);
+  body_block->AddPrevBlock(header_block);
+  // create 'x = xs[i]'
+  CNodePtr target_var = body_block->func_graph()->NewCNode({op_getitem, iter_node, loop_var});
+  WriteAssignVars(body_block, target_node, target_var);
+  // create 'i = i + 1'
+  CNodePtr loop_var_inc =
+    body_block->func_graph()->NewCNode({NewValueNode(prim::kPrimScalarAdd), loop_var, NewValueNode(1)});
+  body_block->WriteVariable(loop_var->name(), loop_var_inc);
+
+  // link the variable name with the target
+  auto it_info = std::make_shared<TraceIterator>(loop_var_inc->debug_info());
+  loop_var->debug_info()->set_trace_info(it_info);
+  len_iter->debug_info()->set_trace_info(it_info);
+
+  TraceManager::DebugTrace(std::make_shared<TraceForAfter>(block->func_graph()->debug_info()));
+  FunctionBlockPtr after_block = MakeFunctionBlock(*this);
+  MS_EXCEPTION_IF_NULL(after_block);
+  TraceManager::EndTrace();
+  after_block->AddPrevBlock(header_block);
+
+  block->Jump(header_block, NewValueNode(0));
+  body_block->Mature();
+
+  header_block->ConditionalJump(cond_node, body_block, after_block, false);
+
+  // Parse loop body statements with loop context.
+  LoopContext loop_context{&loops_, header_block, loop_var_inc};
+  py::object body_node = python_adapter::GetPyObjAttr(node, "body");
+  FunctionBlockPtr after_body_block = ParseStatements(body_block, body_node);
+  if (after_body_block->func_graph()->get_return() == nullptr) {
+    after_body_block->Jump(header_block, loop_var_inc);
+  }
+
+  header_block->Mature();
+  after_block->Mature();
+  auto &end_block = loop_context.EndBlock();
+  if (end_block) {
+    // end_block exists if we encounter 'break' in loop body.
+    after_block->Jump(end_block, nullptr);
+    end_block->Mature();
+    return end_block;
+  }
+  // No 'break', no end_block.
+  return after_block;
+}
+
 AnfNodePtr Parser::ParseIfExp(const FunctionBlockPtr &block, const py::object &node) {
   MS_LOG(DEBUG) << "Process ast IfExp";
   MS_EXCEPTION_IF_NULL(block);
diff --git a/mindspore/ccsrc/pipeline/parse/parse.h b/mindspore/ccsrc/pipeline/jit/parse/parse.h
similarity index 97%
rename from mindspore/ccsrc/pipeline/parse/parse.h
rename to mindspore/ccsrc/pipeline/jit/parse/parse.h
index 19c503c6d0e..90e965389f4 100644
--- a/mindspore/ccsrc/pipeline/parse/parse.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/parse.h
@@ -27,9 +27,9 @@
 #include <memory>
 #include "utils/misc.h"
 #include "ir/anf.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/function_block.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/function_block.h"
 
 namespace mindspore {
 namespace parse {
@@ -106,6 +106,8 @@ class Parser {
   FunctionBlockPtr ParseWhile(const FunctionBlockPtr &block, const py::object &node);
   // process a for statement
   FunctionBlockPtr ParseFor(const FunctionBlockPtr &block, const py::object &node);
+  FunctionBlockPtr ParseForIter(const FunctionBlockPtr &block, const py::object &node);
+  FunctionBlockPtr ParseForLoop(const FunctionBlockPtr &block, const py::object &node);
   // process a function def statement
   FunctionBlockPtr ParseFunctionDef(const FunctionBlockPtr &block, const py::object &node);
   // process a augment assign
diff --git a/mindspore/ccsrc/pipeline/parse/parse_base.h b/mindspore/ccsrc/pipeline/jit/parse/parse_base.h
similarity index 99%
rename from mindspore/ccsrc/pipeline/parse/parse_base.h
rename to mindspore/ccsrc/pipeline/jit/parse/parse_base.h
index 4961ab78c0f..bdd79d00bd4 100644
--- a/mindspore/ccsrc/pipeline/parse/parse_base.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/parse_base.h
@@ -87,6 +87,7 @@ const char PYTHON_PARSE_CLASS_ELLIPSIS[] = "create_ellipsis_obj";
 const char PYTHON_MOD_GET_DEFAULT_INPUT[] = "get_default_input";
 
 // define the common name
+const char NAMED_PRIMITIVE_LEN[] = "len";
 const char NAMED_PRIMITIVE_ITER[] = "iter";
 const char NAMED_PRIMITIVE_NEXT[] = "next";
 const char NAMED_PRIMITIVE_GETITEM[] = "getitem";
diff --git a/mindspore/ccsrc/pipeline/parse/python_adapter.cc b/mindspore/ccsrc/pipeline/jit/parse/python_adapter.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/parse/python_adapter.cc
rename to mindspore/ccsrc/pipeline/jit/parse/python_adapter.cc
index df2f7d0d45d..17be74b2a1f 100644
--- a/mindspore/ccsrc/pipeline/parse/python_adapter.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/python_adapter.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include <map>
 #include <memory>
 #include <string>
diff --git a/mindspore/ccsrc/pipeline/parse/python_adapter.h b/mindspore/ccsrc/pipeline/jit/parse/python_adapter.h
similarity index 98%
rename from mindspore/ccsrc/pipeline/parse/python_adapter.h
rename to mindspore/ccsrc/pipeline/jit/parse/python_adapter.h
index 98adcd4f731..0f49539bc85 100644
--- a/mindspore/ccsrc/pipeline/parse/python_adapter.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/python_adapter.h
@@ -24,7 +24,7 @@
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 
-#include "pipeline/parse/parse_base.h"
+#include "pipeline/jit/parse/parse_base.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pipeline/parse/resolve.cc b/mindspore/ccsrc/pipeline/jit/parse/resolve.cc
similarity index 87%
rename from mindspore/ccsrc/pipeline/parse/resolve.cc
rename to mindspore/ccsrc/pipeline/jit/parse/resolve.cc
index 87c2f78b425..8d4c4026391 100644
--- a/mindspore/ccsrc/pipeline/parse/resolve.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/resolve.cc
@@ -14,21 +14,21 @@
  * limitations under the License.
  */
 
-#include "pipeline/parse/resolve.h"
+#include "pipeline/jit/parse/resolve.h"
 
 #include <string>
 #include <memory>
 #include <vector>
 #include <algorithm>
 
-#include "ir/param_value_py.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/parse/parse.h"
-#include "pipeline/parse/python_adapter.h"
+#include "ir/param_value.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/parse/parse.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include "utils/any.h"
-#include "operator/ops.h"
-#include "optimizer/opt.h"
-#include "optimizer/irpass.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/optimizer/irpass.h"
 #include "./common.h"
 
 namespace mindspore {
@@ -103,16 +103,12 @@ AnfNodePtr ResolveParameterObj(const FuncGraphPtr &func_graph, const py::object
   }
   if (para_node == nullptr) {
     auto node = top_graph->AddWeightParameter(param_name);
-    auto param_value_new = std::make_shared<ParamValuePy>(obj);
-    node->set_default_param(param_value_new);
-
+    auto param_value = py::cast<ParamValuePtr>(python_adapter::GetPyObjAttr(obj, "_value"));
+    node->set_default_param(param_value);
     // set_abstract for parameter
-    auto to_convert = py::cast<py::object>(python_adapter::GetPyObjAttr(obj, "default_input"));
-    ValuePtr converted = nullptr;
-    (void)ConvertData(to_convert, &converted);
-    bool broaden = true;
-    node->set_abstract(abstract::FromValue(converted, broaden));
-
+    ValuePtr value = param_value->value();
+    constexpr bool broaden = true;
+    node->set_abstract(abstract::FromValue(value, broaden));
     para_node = node;
   }
   auto iter = func_graph->make_ref_params().find(para_node);
@@ -172,15 +168,15 @@ bool ResolveObjectToNode(const FuncGraphPtr &func_graph, const py::object &obj,
   return true;
 }
 
-bool IsAllGraphInValueSequence(const std::vector<ValuePtr> &value_vec) {
+bool IsAllFuncInValueSequence(const std::vector<ValuePtr> &value_vec) {
   for (auto &elem : value_vec) {
     if (elem->isa<ValueTuple>() || elem->isa<ValueList>()) {
       const auto &vec = GetValue<std::vector<ValuePtr>>(elem);
-      auto is_graph = IsAllGraphInValueSequence(vec);
+      auto is_graph = IsAllFuncInValueSequence(vec);
       if (!is_graph) {
         return false;
       }
-    } else if (!elem->isa<FuncGraph>()) {
+    } else if (!elem->isa<FuncGraph>() && !elem->isa<Primitive>()) {
       return false;
     }
   }
@@ -200,6 +196,8 @@ AnfNodePtr TransformToMakeTupleNodes(const FuncGraphManagerPtr &manager, const F
       FuncGraphPtr new_fg = elem->cast<FuncGraphPtr>();
       manager->AddFuncGraph(new_fg);
       node = NewValueNode(new_fg);
+    } else if (elem->isa<Primitive>()) {
+      node = NewValueNode(elem);
     } else {
       MS_LOG(EXCEPTION) << "TransformToMakeTupleNodes error, expect funcgraph, got " << elem->ToString();
     }
@@ -209,19 +207,21 @@ AnfNodePtr TransformToMakeTupleNodes(const FuncGraphManagerPtr &manager, const F
   return cnode;
 }
 
-// transform the ValueTuple or ValueList of graph node to make tuple of const graph node
-bool TransformVectorGraphValueNode(const FuncGraphManagerPtr &manager, const FuncGraphPtr &func_graph,
-                                   const ValueNodePtr &value_node, AnfNodePtr *const transformed) {
+// transform the ValueTuple or ValueList of graph/primitve node to make tuple of const graph/primitve node
+bool TransformVectorFuncValueNode(const FuncGraphManagerPtr &manager, const FuncGraphPtr &func_graph,
+                                  const ValueNodePtr &value_node, AnfNodePtr *const transformed) {
   MS_EXCEPTION_IF_NULL(value_node);
   const auto &value_vec = GetValue<std::vector<ValuePtr>>(value_node->value());
-  if (!IsAllGraphInValueSequence(value_vec)) {
+  if (!IsAllFuncInValueSequence(value_vec)) {
     return false;
   }
 
-  // The celllist or ordered_cell will be parsed as valuetuple of const graph in it,
+  // (1) The celllist or ordered_cell will be parsed as valuetuple of const graph in it,
   // So if has graph in list, try to replace the node with make tuple of graph value node.
   // we do this because the graphmanger won't investigate the graph inside valuetuple,
-  // change the vector of graph to be make_tuple of graph value node
+  // change the vector of graph to be make_tuple of graph value node.
+  // (2) the primitve valuetuple or valuelist may encounter to abstract error, make it all
+  // independent nodes.
   auto node_tuple_graphs = TransformToMakeTupleNodes(manager, func_graph, value_vec);
   // replace the ret ptr to be make tuple of graph value node
   *transformed = node_tuple_graphs;
@@ -255,8 +255,8 @@ AnfNodePtr ResolveSymbol(const FuncGraphManagerPtr &manager, const NameSpacePtr
 
   // if the constant node is constant of vector of graph ,add graph to manager
   if (IsValueNode<ValueTuple>(resolved_node) || IsValueNode<ValueList>(resolved_node)) {
-    (void)TransformVectorGraphValueNode(manager, node->func_graph(), resolved_node->cast<ValueNodePtr>(),
-                                        &resolved_node);
+    (void)TransformVectorFuncValueNode(manager, node->func_graph(), resolved_node->cast<ValueNodePtr>(),
+                                       &resolved_node);
   }
 
   TraceManager::EndTrace();
diff --git a/mindspore/ccsrc/pipeline/parse/resolve.h b/mindspore/ccsrc/pipeline/jit/parse/resolve.h
similarity index 97%
rename from mindspore/ccsrc/pipeline/parse/resolve.h
rename to mindspore/ccsrc/pipeline/jit/parse/resolve.h
index df5c54855f0..d924f1ef446 100644
--- a/mindspore/ccsrc/pipeline/parse/resolve.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/resolve.h
@@ -21,9 +21,9 @@
 #include <string>
 #include "ir/anf.h"
 #include "ir/manager.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "abstract/abstract_value.h"
 #include "utils/log_adapter.h"
 
 // forward declaration of ResourceBase
diff --git a/mindspore/ccsrc/pipeline/pass.cc b/mindspore/ccsrc/pipeline/jit/pass.cc
similarity index 91%
rename from mindspore/ccsrc/pipeline/pass.cc
rename to mindspore/ccsrc/pipeline/jit/pass.cc
index f6cfd6362c4..bb9a517556e 100644
--- a/mindspore/ccsrc/pipeline/pass.cc
+++ b/mindspore/ccsrc/pipeline/jit/pass.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/pass.h"
+#include "pipeline/jit/pass.h"
 
 #include <memory>
 #include <utility>
@@ -26,19 +26,19 @@
 
 #include "ir/func_graph_cloner.h"
 #include "debug/anf_ir_utils.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/resource.h"
-#include "pipeline/validator.h"
-#include "optimizer/optimizer.h"
-#include "optimizer/cse.h"
-#include "optimizer/graph_kernel_reuse.h"
-#include "optimizer/clean.h"
-#include "optimizer/irpass.h"
-#include "optimizer/control_depend.h"
-#include "parallel/step_parallel.h"
-#include "parallel/step_auto_parallel.h"
-#include "parallel/allreduce_fusion/step_allreduce_fusion.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/validator.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/cse.h"
+#include "frontend/optimizer/graph_kernel_reuse.h"
+#include "frontend/optimizer/clean.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/control_depend.h"
+#include "frontend/parallel/step_parallel.h"
+#include "frontend/parallel/step_auto_parallel.h"
+#include "frontend/parallel/allreduce_fusion/step_allreduce_fusion.h"
 #include "utils/any.h"
 #include "utils/log_adapter.h"
 
@@ -321,21 +321,19 @@ bool InferenceOptPreparePass(const ResourcePtr &res) {
   return true;
 }
 
-std::vector<PassItem> kVmPasses = {{"simplify_data_structures", SimplifyDataStructuresPass},
-                                   {"opt_a", OptPassAGroup},
+std::vector<PassItem> kVmPasses = {{"opt_a", OptPassAGroup},
+                                   {"simplify_data_structures", SimplifyDataStructuresPass},
                                    {"opt_b", OptPassBGroup},
                                    {"cconv", CconvPass},
                                    {"opt_graph_kernel_a", OptPassGraphKernelGroupA},
                                    {"opt_graph_kernel_b", OptPassGraphKernelGroupB},
                                    {"add_control_depend", AddControlDependPass}};
 
-std::vector<PassItem> kGePasses = {{"simplify_data_structures", SimplifyDataStructuresPass},
-                                   {"opt_a", OptPassAGroup},
-                                   {"opt_b", OptPassBGroup},
-                                   {"add_control_depend", AddControlDependPass},
-                                   {"opt_control", ControlGroup},
-                                   {"opt_prepare", PrepareGroup},
-                                   {"cconv", CconvPass}};
+std::vector<PassItem> kGePasses = {
+  {"opt_a", OptPassAGroup},      {"simplify_data_structures", SimplifyDataStructuresPass},
+  {"opt_b", OptPassBGroup},      {"add_control_depend", AddControlDependPass},
+  {"opt_control", ControlGroup}, {"opt_prepare", PrepareGroup},
+  {"cconv", CconvPass}};
 
 std::vector<PassItem> kPynativePasses = {{"opt_a", OptPassAGroup}, {"opt_b", OptPassBGroup}, {"cconv", CconvPass}};
 }  // namespace pipeline
diff --git a/mindspore/ccsrc/pipeline/pass.h b/mindspore/ccsrc/pipeline/jit/pass.h
similarity index 97%
rename from mindspore/ccsrc/pipeline/pass.h
rename to mindspore/ccsrc/pipeline/jit/pass.h
index 9064df52ee4..0233b6cf264 100644
--- a/mindspore/ccsrc/pipeline/pass.h
+++ b/mindspore/ccsrc/pipeline/jit/pass.h
@@ -21,7 +21,7 @@
 #include <functional>
 #include <utility>
 #include <string>
-#include "pipeline/resource.h"
+#include "pipeline/jit/resource.h"
 
 namespace mindspore {
 namespace pipeline {
diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/jit/pipeline.cc
similarity index 93%
rename from mindspore/ccsrc/pipeline/pipeline.cc
rename to mindspore/ccsrc/pipeline/jit/pipeline.cc
index 6abe198f5a8..05699793ff8 100644
--- a/mindspore/ccsrc/pipeline/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/pipeline.h"
+#include "pipeline/jit/pipeline.h"
 
 #include <sstream>
 #include <map>
@@ -24,27 +24,27 @@
 #include <cstdlib>
 #include <algorithm>
 
-#include "ir/param_value_py.h"
-#include "pipeline/pass.h"
-#include "pipeline/parse/data_converter.h"
-#include "optimizer/ad/dfunctor.h"
+#include "ir/param_value.h"
+#include "pipeline/jit/pass.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "frontend/optimizer/ad/dfunctor.h"
 #include "debug/anf_ir_dump.h"
 #include "debug/anf_ir_utils.h"
 #include "utils/config_manager.h"
 #include "utils/convert_utils.h"
 #include "utils/utils.h"
 #include "vm/segment_runner.h"
-#include "parallel/context.h"
-#include "parallel/graph_util/get_parallel_info.h"
-#include "device/kernel_runtime_manager.h"
+#include "frontend/parallel/context.h"
+#include "frontend/parallel/graph_util/get_parallel_info.h"
+#include "runtime/device/kernel_runtime_manager.h"
 #include "debug/trace.h"
-#include "pynative/pynative_execute.h"
-#include "optimizer/py_pass_manager.h"
+#include "pipeline/pynative/pynative_execute.h"
+#include "frontend/optimizer/py_pass_manager.h"
 
 #if (ENABLE_GE || ENABLE_D)
-#include "pipeline/pipeline_ge.h"
-#include "transform/convert.h"
-#include "transform/df_graph_manager.h"
+#include "pipeline/jit/pipeline_ge.h"
+#include "transform/graph_ir/convert.h"
+#include "transform/graph_ir/df_graph_manager.h"
 #endif
 
 namespace mindspore {
@@ -289,7 +289,8 @@ std::map<std::string, std::pair<PrimitivePyPtr, std::string>> ExecutorPy::FetchI
   MS_LOG(DEBUG) << "FetchInfoForQuantExport func graph(" << func_graph->ToString() << ") phase(" << phase_s << ")!";
   std::map<std::string, std::pair<PrimitivePyPtr, std::string>> fake_quant_table;
   auto filter = [](AnfNodePtr node) {
-    return !(IsPrimitiveCNode(node, prim::kPrimConv2D) || IsPrimitiveCNode(node, prim::kPrimMatMul));
+    return !(IsPrimitiveCNode(node, prim::kPrimConv2D) || IsPrimitiveCNode(node, prim::kPrimMatMul) ||
+             IsPrimitiveCNode(node, prim::kPrimDepthwiseConv2dNative));
   };
   std::vector<AnfNodePtr> nodes = DeepScopedGraphSearchWithFilter(func_graph->get_return(), AlwaysInclude, filter);
   auto is_quant_cnode = [](AnfNodePtr node) {
@@ -327,6 +328,9 @@ std::map<std::string, std::pair<PrimitivePyPtr, std::string>> ExecutorPy::FetchI
       x = cnode->input(1);
       count += 1;
     }
+    if (x->isa<Parameter>()) {
+      fake_quant_table[weight_name] = std::make_pair(nullptr, "input");
+    }
     // get the fakequant parameter minq's name
     if (!is_quant_cnode(x)) {
       continue;
@@ -374,34 +378,6 @@ void ExecutorPy::SaveCompiledGraph(const std::string &phase_s) {
   MS_LOG(INFO) << "End save compiled func graph!";
 }
 
-void ExecutorPy::SaveCompiledGraphToPb(const std::string &phase_s) {
-#ifdef ENABLE_DUMP_IR
-  // save the graph to file in protobuf format
-  FuncGraphPtr func_graph = info_[phase_s]->resource->func_graph();
-  MS_EXCEPTION_IF_NULL(func_graph);
-  if (phase_s.empty()) {
-    MS_LOG(ERROR) << "`phase` is empty '" << phase_s << "'!";
-    return;
-  }
-  std::string name_prefix = phase_s.substr(0, phase_s.find("."));
-  std::string pb_filename = std::string("ms_output_") + name_prefix + ".pb";
-  std::string filename = GetFilePathName(pb_filename);
-
-  MS_LOG(INFO) << "Begin saving graph to file <<'" << filename << "' in protobuf formart.";
-  ChangeFileMode(filename, S_IRWXU);
-  std::ofstream ofs(filename);
-  if (!ofs.is_open()) {
-    MS_LOG(ERROR) << "Open file '" << filename << "' failed!";
-    return;
-  }
-  ofs << GetFuncGraphProtoString(func_graph);
-  ofs.close();
-  // set file mode to read only by user
-  ChangeFileMode(filename, S_IRUSR);
-  MS_LOG(INFO) << "End saving graph to file in protobuf format";
-#endif
-}
-
 bool ExecutorPy::ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const {
   std::string phase_prefix = GetPhasePrefix(phase_s);
 
@@ -476,8 +452,6 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
   info_[phase_s] = executor_info;
   pip->Run();
 
-  // save compile graph to file in protobuf format
-  SaveCompiledGraphToPb(phase_s);
   // save the run graph func to MsPipeLine
   SaveCompiledGraph(phase_s);
 
@@ -650,11 +624,8 @@ void Pipeline::Run() {
           draw::Draw(base_name + ".dot", graph);
           // generate IR file in human readable format
           DumpIR(base_name + ".ir", graph);
-
           // generate IR file in a heavily commented format, which can also be reloaded
-          if (action.first != "parse") {
-            ExportIR(base_name + ".dat", std::to_string(i), graph);
-          }
+          ExportIR(base_name + ".dat", std::to_string(i), graph);
         }
 #ifdef MS_DEBUG
         // Dump graph cnode list
@@ -727,10 +698,7 @@ void ProcessVmArgInner(const py::tuple &args, const ResourcePtr &res, VectorRef
       if (!param_ptr->has_default()) {
         MS_LOG(EXCEPTION) << "Parameter[" << i << "] has no default param";
       }
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_ptr->default_param());
-      py::object obj = param_value->value();
-      py::object p_value = py::cast<py::object>(parse::python_adapter::GetPyObjAttr(obj, "default_input"));
-      (*arg_list).push_back(p_value);
+      arg_list->push_back(param_ptr->default_param()->value());
     }
   }
 }
diff --git a/mindspore/ccsrc/pipeline/pipeline.h b/mindspore/ccsrc/pipeline/jit/pipeline.h
similarity index 97%
rename from mindspore/ccsrc/pipeline/pipeline.h
rename to mindspore/ccsrc/pipeline/jit/pipeline.h
index 3f1274c417d..705853d0860 100644
--- a/mindspore/ccsrc/pipeline/pipeline.h
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.h
@@ -29,10 +29,10 @@
 #include "debug/draw.h"
 #include "ir/anf.h"
 #include "ir/tensor.h"
-#include "pipeline/action.h"
+#include "pipeline/jit/action.h"
 #include "vm/segment_runner.h"
 #include "vm/transform.h"
-#include "pipeline/base.h"
+#include "pipeline/jit/base.h"
 
 namespace mindspore {
 extern const char kMsConvert[];
@@ -72,7 +72,6 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
   ~ExecutorPy();
 
   void SaveCompiledGraph(const std::string &phase_s);
-  void SaveCompiledGraphToPb(const std::string &phase_s);
   bool CompileInner(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm);
   bool Compile(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm);
 
diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.cc b/mindspore/ccsrc/pipeline/jit/pipeline_ge.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/pipeline_ge.cc
rename to mindspore/ccsrc/pipeline/jit/pipeline_ge.cc
index 8ec16023157..e08af4f2dc8 100644
--- a/mindspore/ccsrc/pipeline/pipeline_ge.cc
+++ b/mindspore/ccsrc/pipeline/jit/pipeline_ge.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/pipeline_ge.h"
+#include "pipeline/jit/pipeline_ge.h"
 
 #include <sstream>
 #include <map>
@@ -24,12 +24,12 @@
 
 #include "debug/anf_ir_dump.h"
 #include "ir/tensor.h"
-#include "transform/convert.h"
-#include "transform/df_graph_manager.h"
-#include "transform/graph_builder.h"
-#include "transform/graph_runner.h"
+#include "transform/graph_ir/convert.h"
+#include "transform/graph_ir/df_graph_manager.h"
+#include "transform/graph_ir/graph_builder.h"
+#include "transform/graph_ir/graph_runner.h"
 #include "debug/draw.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 namespace pipeline {
diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.h b/mindspore/ccsrc/pipeline/jit/pipeline_ge.h
similarity index 96%
rename from mindspore/ccsrc/pipeline/pipeline_ge.h
rename to mindspore/ccsrc/pipeline/jit/pipeline_ge.h
index f3a363dbe89..f8341252311 100644
--- a/mindspore/ccsrc/pipeline/pipeline_ge.h
+++ b/mindspore/ccsrc/pipeline/jit/pipeline_ge.h
@@ -26,8 +26,8 @@
 #include <mutex>
 
 #include "pybind11/pybind11.h"
-#include "pipeline/base.h"
-#include "operator/ops.h"
+#include "pipeline/jit/base.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace pipeline {
diff --git a/mindspore/ccsrc/pipeline/remove_value_node_dup.cc b/mindspore/ccsrc/pipeline/jit/remove_value_node_dup.cc
similarity index 96%
rename from mindspore/ccsrc/pipeline/remove_value_node_dup.cc
rename to mindspore/ccsrc/pipeline/jit/remove_value_node_dup.cc
index 47881e4b915..e9467e4aeb9 100644
--- a/mindspore/ccsrc/pipeline/remove_value_node_dup.cc
+++ b/mindspore/ccsrc/pipeline/jit/remove_value_node_dup.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "pipeline/remove_value_node_dup.h"
+#include "pipeline/jit/remove_value_node_dup.h"
 #include "ir/anf.h"
 #include "ir/tensor.h"
 #include "ir/manager.h"
-#include "optimizer/cse.h"
+#include "frontend/optimizer/cse.h"
 #include "utils/log_adapter.h"
 #include "utils/hashing.h"
 
diff --git a/mindspore/ccsrc/pipeline/remove_value_node_dup.h b/mindspore/ccsrc/pipeline/jit/remove_value_node_dup.h
similarity index 98%
rename from mindspore/ccsrc/pipeline/remove_value_node_dup.h
rename to mindspore/ccsrc/pipeline/jit/remove_value_node_dup.h
index 8f670c7dcfd..b36544bdbad 100644
--- a/mindspore/ccsrc/pipeline/remove_value_node_dup.h
+++ b/mindspore/ccsrc/pipeline/jit/remove_value_node_dup.h
@@ -19,7 +19,7 @@
 
 #include <unordered_map>
 #include <vector>
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/manager.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pipeline/resource.cc b/mindspore/ccsrc/pipeline/jit/resource.cc
similarity index 82%
rename from mindspore/ccsrc/pipeline/resource.cc
rename to mindspore/ccsrc/pipeline/jit/resource.cc
index faf1f2015d4..ece128b77b7 100644
--- a/mindspore/ccsrc/pipeline/resource.cc
+++ b/mindspore/ccsrc/pipeline/jit/resource.cc
@@ -16,16 +16,16 @@
  * limitations under the License.
  */
 
-#include "pipeline/resource.h"
-#include "pipeline/pipeline.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/pipeline.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "debug/draw.h"
 #include "debug/trace.h"
 #include "ir/dtype.h"
-#include "pipeline/parse/data_converter.h"
-#include "operator/ops.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "frontend/operator/ops.h"
 #include "utils/graph_utils.h"
-#include "optimizer/ad/dfunctor.h"
+#include "frontend/optimizer/ad/dfunctor.h"
 #include "vm/segment_runner.h"
 
 namespace mindspore {
@@ -146,37 +146,35 @@ MethodMap &GetMethodMap() {
      }},
     {kObjectTypeTensorType,
      {
-       {"__add__", std::string("add")},                    // C.add
-       {"__sub__", std::string("sub")},                    // C.sub
-       {"__mul__", std::string("mul")},                    // C.mul
-       {"__truediv__", std::string("truediv")},            // C.truediv
-       {"__floordiv__", std::string("floordiv")},          // C.floordiv
-       {"__mod__", std::string("mod")},                    // C.mod
-       {"__pow__", std::string("pow_")},                   // C.pow
-       {"__floor__", std::string("array_floor")},          // C.array_floor
-       {"__trunc__", std::string("array_trunc")},          // C.array_trunc
-       {"__pos__", std::string("array_uadd")},             // C.array_uadd
-       {"__neg__", std::string("array_usub")},             // C.array_usub
-       {"__eq__", std::string("eq")},                      // C.eq
-       {"__ne__", std::string("ne")},                      // C.ne
-       {"__lt__", std::string("lt")},                      // C.lt
-       {"__gt__", std::string("gt")},                      // C.gt
-       {"__le__", std::string("le")},                      // C.le
-       {"__ge__", std::string("ge")},                      // C.ge
-       {"__matmul__", prim::kPrimDot},                     // P.dot,
-       {"__len__", prim::kPrimArrayLen},                   // P.array_len,
-       {"__getitem__", prim::kPrimArrayGetItem},           // P.array_getitem,
-       {"__setitem__", prim::kPrimArraySetItem},           // P.array_setitem,
-       {"__ms_iter__", std::string("array_iter")},         // C.array_iter
-       {"__ms_to_array__", prim::kPrimIdentity},           // P.identity,
-       {"item", prim::kPrimArrayToScalar},                 // P.array_to_scalar,
-       {"transpose", std::string("transpose")},            // P.transpose
-       {"__bool__", std::string("tensor_bool")},           // C.tensor_bool
-       {"is_indexed_slices", prim::kPrimIsIndexedSlices},  // F.is_indexed_slices
+       {"__add__", std::string("add")},             // C.add
+       {"__sub__", std::string("sub")},             // C.sub
+       {"__mul__", std::string("mul")},             // C.mul
+       {"__truediv__", std::string("truediv")},     // C.truediv
+       {"__floordiv__", std::string("floordiv")},   // C.floordiv
+       {"__mod__", std::string("mod")},             // C.mod
+       {"__pow__", std::string("pow_")},            // C.pow
+       {"__floor__", std::string("array_floor")},   // C.array_floor
+       {"__trunc__", std::string("array_trunc")},   // C.array_trunc
+       {"__pos__", std::string("array_uadd")},      // C.array_uadd
+       {"__neg__", std::string("array_usub")},      // C.array_usub
+       {"__eq__", std::string("eq")},               // C.eq
+       {"__ne__", std::string("ne")},               // C.ne
+       {"__lt__", std::string("lt")},               // C.lt
+       {"__gt__", std::string("gt")},               // C.gt
+       {"__le__", std::string("le")},               // C.le
+       {"__ge__", std::string("ge")},               // C.ge
+       {"__matmul__", prim::kPrimDot},              // P.dot,
+       {"__len__", prim::kPrimArrayLen},            // P.array_len,
+       {"__getitem__", prim::kPrimArrayGetItem},    // P.array_getitem,
+       {"__setitem__", prim::kPrimArraySetItem},    // P.array_setitem,
+       {"__ms_iter__", std::string("array_iter")},  // C.array_iter
+       {"__ms_to_array__", prim::kPrimIdentity},    // P.identity,
+       {"item", prim::kPrimArrayToScalar},          // P.array_to_scalar,
+       {"transpose", std::string("transpose")},     // P.transpose
+       {"__bool__", std::string("tensor_bool")},    // C.tensor_bool
      }},
     {kObjectTypeIndexedSlicesType,
      {
-       {"is_indexed_slices", prim::kPrimIsIndexedSlices},       // F.is_indexed_slices
        {"values", prim::kPrimIndexedSlicesGetValues},           // F.indexed_slices_get_values
        {"indices", prim::kPrimIndexedSlicesGetIndices},         // F.indexed_slices_get_indices
        {"dense_shape", prim::kPrimIndexedSlicesGetDenseShape},  // F.indexed_slices_get_dense_shape
diff --git a/mindspore/ccsrc/pipeline/resource.h b/mindspore/ccsrc/pipeline/jit/resource.h
similarity index 96%
rename from mindspore/ccsrc/pipeline/resource.h
rename to mindspore/ccsrc/pipeline/jit/resource.h
index 0c1348fd943..819fdd3d203 100644
--- a/mindspore/ccsrc/pipeline/resource.h
+++ b/mindspore/ccsrc/pipeline/jit/resource.h
@@ -29,8 +29,8 @@
 #include "utils/any.h"
 #include "utils/profile.h"
 #include "ir/manager.h"
-#include "pipeline/static_analysis/prim.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "./common.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/abstract_function.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/abstract_function.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/static_analysis/abstract_function.cc
rename to mindspore/ccsrc/pipeline/jit/static_analysis/abstract_function.cc
index ced4a518cb8..8bdb2a0c6c3 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/abstract_function.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/abstract_function.cc
@@ -14,12 +14,11 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/abstract_function.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
 
 #include <vector>
 
-#include "pipeline/static_analysis/analysis_context.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/abstract_function.h b/mindspore/ccsrc/pipeline/jit/static_analysis/abstract_function.h
similarity index 99%
rename from mindspore/ccsrc/pipeline/static_analysis/abstract_function.h
rename to mindspore/ccsrc/pipeline/jit/static_analysis/abstract_function.h
index 9e1cf9ba83d..0823b21cd76 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/abstract_function.h
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/abstract_function.h
@@ -22,8 +22,8 @@
 #include <memory>
 #include <string>
 
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/analysis_context.h"
+#include "abstract/abstract_value.h"
+#include "abstract/analysis_context.h"
 #include "ir/meta_func_graph.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/evaluator.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
rename to mindspore/ccsrc/pipeline/jit/static_analysis/evaluator.cc
index 34ecfc89808..3e820eed3a6 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/evaluator.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/evaluator.h"
+#include "pipeline/jit/static_analysis/evaluator.h"
 
 #include <algorithm>
 #include <unordered_set>
 
 #include "ir/func_graph_cloner.h"
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/utils.h"
 #include "debug/trace.h"
 
 namespace mindspore {
@@ -126,7 +126,11 @@ EvalResultPtr BaseFuncGraphEvaluator::Eval(AnalysisEnginePtr engine, const Abstr
   }
 
   MS_EXCEPTION_IF_NULL(ret_base);
-  MS_LOG(DEBUG) << "BaseFuncGraph " << fg->ToString() << " eval end, evaluated abstract: " << ret_base->ToString();
+  MS_LOG(DEBUG) << "BaseFuncGraph " << fg->ToString() << " eval end, evaluated abstract: " << ret_base->ToString()
+                << ", is stub: " << fg->stub();
+  if (fg->stub()) {
+    return std::make_shared<EvalResult>(std::make_shared<AbstractUndetermined>(), nullptr);
+  }
   return std::make_shared<EvalResult>(ret_base, nullptr);
 }
 
diff --git a/mindspore/ccsrc/pipeline/static_analysis/evaluator.h b/mindspore/ccsrc/pipeline/jit/static_analysis/evaluator.h
similarity index 97%
rename from mindspore/ccsrc/pipeline/static_analysis/evaluator.h
rename to mindspore/ccsrc/pipeline/jit/static_analysis/evaluator.h
index f6430eda84c..461574257de 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/evaluator.h
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/evaluator.h
@@ -24,7 +24,8 @@
 #include <unordered_map>
 #include <vector>
 
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
+#include "utils/context/ms_context.h"
 
 namespace mindspore {
 namespace abstract {
@@ -59,6 +60,13 @@ class Evaluator : public Base {
   }
 
   virtual EvalResultPtr AbstractEval(const AbstractBasePtrList &args_spec_list) {
+    auto context = MsContext::GetInstance();
+    MS_EXCEPTION_IF_NULL(context);
+    bool enable_sparse = context->enable_sparse();
+    if (!enable_sparse) {
+      return nullptr;
+    }
+
     auto is_abstract = std::any_of(args_spec_list.begin(), args_spec_list.end(), [](auto &arg) {
       if (arg->BuildType()->type_id() == kObjectTypeUndeterminedType) {
         return true;
diff --git a/mindspore/ccsrc/pipeline/static_analysis/prim.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/prim.cc
similarity index 96%
rename from mindspore/ccsrc/pipeline/static_analysis/prim.cc
rename to mindspore/ccsrc/pipeline/jit/static_analysis/prim.cc
index 99dc0859893..99e613395cc 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/prim.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/prim.cc
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/prim.h"
 
 #include <algorithm>
 #include <limits>
@@ -25,20 +25,20 @@
 #include <string>
 #include <utility>
 
-#include "operator/cc_implementations.h"
-#include "operator/ops.h"
-#include "operator/composite/do_signature.h"
-#include "operator/prim_to_function.h"
-#include "pipeline/static_analysis/utils.h"
+#include "frontend/operator/cc_implementations.h"
+#include "frontend/operator/ops.h"
+#include "frontend/operator/composite/do_signature.h"
+#include "frontend/operator/prim_to_function.h"
+#include "abstract/utils.h"
 #include "utils/symbolic.h"
 #include "./common.h"
-#include "pipeline/resource.h"
-#include "pipeline/parse/resolve.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/parse/resolve.h"
 #include "ir/tensor.h"
 #include "utils/convert_utils.h"
 #include "utils/context/ms_context.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/static_analysis/param_validator.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "abstract/param_validator.h"
 #include "common/utils.h"
 
 namespace mindspore {
@@ -146,10 +146,7 @@ PrimitiveEvalImplMap &GetPrimitiveToEvalImplMap() {
 using mindspore::parse::PyObjectWrapper;
 
 EvalResultPtr StandardPrimEvaluator::EvalPrim(const AnalysisEnginePtr &engine, const AbstractBasePtrList &args) {
-  auto context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context);
-  bool enable_sparse_flag = context->enable_sparse_flag();
-  if (enable_sparse_flag && prim_ != prim::kPrimMakeTuple && prim_ != prim::kPrimSwitch) {
+  if (prim_ != prim::kPrimMakeTuple && prim_ != prim::kPrimSwitch) {
     auto ret_abstract = AbstractEval(args);
     if (ret_abstract != nullptr) {
       MS_LOG(DEBUG) << "StandardPrimEvaluator eval Undetermined";
@@ -167,6 +164,14 @@ EvalResultPtr StandardPrimEvaluator::EvalPrim(const AnalysisEnginePtr &engine, c
 EvalResultPtr DoSignatureEvaluator::Run(AnalysisEnginePtr engine, const ConfigPtrList &args_conf_list,
                                         AnfNodeConfigPtr out_conf) {
   AbstractBasePtrList args_spec_list;
+  (void)std::transform(args_conf_list.begin(), args_conf_list.end(), std::back_inserter(args_spec_list),
+                       [](const ConfigPtr &ref) -> AbstractBasePtr { return ref->GetEvaluatedValue()->abstract(); });
+  auto ret_abstract = AbstractEval(args_spec_list);
+  if (ret_abstract != nullptr) {
+    MS_LOG(DEBUG) << "StandardPrimEvaluator eval Undetermined";
+    return ret_abstract;
+  }
+
   if (out_conf->node() == nullptr || !out_conf->node()->isa<CNode>()) {
     MS_LOG(EXCEPTION) << "Node of out_conf should be CNode";
   }
@@ -181,9 +186,6 @@ EvalResultPtr DoSignatureEvaluator::Run(AnalysisEnginePtr engine, const ConfigPt
   }
   AnfNodePtrList args_inputs{out_node_inputs.begin() + 1, out_node_inputs.end()};
 
-  (void)std::transform(args_conf_list.begin(), args_conf_list.end(), std::back_inserter(args_spec_list),
-                       [](const ConfigPtr &ref) -> AbstractBasePtr { return ref->GetEvaluatedValue()->abstract(); });
-
   ScopePtr scope = kDefaultScope;
   if (out_conf != nullptr) {
     scope = out_conf->node()->scope();
@@ -321,6 +323,13 @@ AnfNodePtr MixedPrecisionCastHelper(AnfNodePtr source_node, AbstractBasePtr node
     }
     target_node = func_graph->NewCNode({NewValueNode(prim::kPrimMakeDict), func_graph->NewCNode(dict_key_nodes),
                                         func_graph->NewCNode(dict_value_nodes)});
+  } else if (node_type->isa<AbstractKeywordArg>()) {
+    auto x = node_type->cast<AbstractKeywordArgPtr>();
+    std::string kwarg_key = x->get_key();
+    AnfNodePtr kwarg_value_node =
+      func_graph->NewCNode({NewValueNode(prim::kPrimExtractKeywordArg), NewValueNode(kwarg_key), source_node});
+    AnfNodePtr node = MixedPrecisionCastHelper(kwarg_value_node, x->get_arg(), target_type, func_graph);
+    target_node = func_graph->NewCNode({NewValueNode(prim::kPrimMakeKeywordArg), NewValueNode(kwarg_key), node});
   }
   return target_node;
 }
@@ -502,15 +511,10 @@ AbstractBasePtr PyInferRes2Abstract(const PrimitivePyPtr &prim_py, const py::dic
 }  // end anonymous namespace
 
 EvalResultPtr PythonPrimEvaluator::EvalPrim(const AnalysisEnginePtr &, const AbstractBasePtrList &args) {
-  auto context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context);
-  bool enable_sparse_flag = context->enable_sparse_flag();
-  if (enable_sparse_flag) {
-    auto ret_abstract = AbstractEval(args);
-    if (ret_abstract != nullptr) {
-      MS_LOG(DEBUG) << "PythonPrimEvaluator eval Undetermined";
-      return ret_abstract;
-    }
+  auto ret_abstract = AbstractEval(args);
+  if (ret_abstract != nullptr) {
+    MS_LOG(DEBUG) << "PythonPrimEvaluator eval Undetermined";
+    return ret_abstract;
   }
   MS_LOG(DEBUG) << "Eval for:" << prim_py_->ToString();
 
@@ -539,15 +543,10 @@ EvalResultPtr PythonPrimEvaluator::EvalPrim(const AnalysisEnginePtr &, const Abs
 }
 
 EvalResultPtr UniformPrimEvaluator::EvalPrim(const AnalysisEnginePtr &, const AbstractBasePtrList &args) {
-  auto context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context);
-  bool enable_sparse_flag = context->enable_sparse_flag();
-  if (enable_sparse_flag) {
-    auto ret_abstract = AbstractEval(args);
-    if (ret_abstract != nullptr) {
-      MS_LOG(DEBUG) << "UniformPrimEvaluator eval Undetermined";
-      return ret_abstract;
-    }
+  auto ret_abstract = AbstractEval(args);
+  if (ret_abstract != nullptr) {
+    MS_LOG(DEBUG) << "UniformPrimEvaluator eval Undetermined";
+    return ret_abstract;
   }
   // if func_desc_.retval type is super class of parameter type, then make the retval type as parameter type.
   if (nargs_ != args.size()) {
@@ -907,8 +906,6 @@ class RefToEmbedEvaluator : public SymbolicPrimEvaluator {
       auto ret = std::make_shared<AbstractScalar>(type);
       auto ref_value = ref_abs->ref();
       MS_EXCEPTION_IF_NULL(ref_value);
-      ret->set_sparse_grad(ref_value->sparse_grad());
-      ret->set_has_indexed_slices_grad(ref_value->has_indexed_slices_grad());
       return std::make_shared<EvalResult>(ret, std::make_shared<AttrValueMap>());
     }
 
@@ -923,8 +920,6 @@ class RefToEmbedEvaluator : public SymbolicPrimEvaluator {
     x = SensitivityTransform(x);
     std::shared_ptr<SymbolicKeyInstance> key = std::make_shared<SymbolicKeyInstance>(node, x);
     std::shared_ptr<AbstractScalar> abs_scalar = std::make_shared<AbstractScalar>(key, type);
-    abs_scalar->set_sparse_grad(x->sparse_grad());
-    abs_scalar->set_has_indexed_slices_grad(x->has_indexed_slices_grad());
     return std::make_shared<EvalResult>(abs_scalar, std::make_shared<AttrValueMap>());
   }
 };
@@ -936,15 +931,10 @@ class GetAttrEvaluator : public TransitionPrimEvaluator {
   MS_DECLARE_PARENT(GetAttrEvaluator, TransitionPrimEvaluator);
   EvalResultPtr EvalPrim(const AnalysisEnginePtr &engine, const AbstractBasePtrList &args_spec_list,
                          const ConfigPtr &in_conf0, const AnfNodeConfigPtr &out_conf) override {
-    auto context = MsContext::GetInstance();
-    MS_EXCEPTION_IF_NULL(context);
-    bool enable_sparse_flag = context->enable_sparse_flag();
-    if (enable_sparse_flag) {
-      auto ret_abstract = AbstractEval(args_spec_list);
-      if (ret_abstract != nullptr) {
-        MS_LOG(DEBUG) << "GetAttrEvaluator eval Undetermined";
-        return ret_abstract;
-      }
+    auto ret_abstract = AbstractEval(args_spec_list);
+    if (ret_abstract != nullptr) {
+      MS_LOG(DEBUG) << "GetAttrEvaluator eval Undetermined";
+      return ret_abstract;
     }
     // Inputs: data, item
     if (args_spec_list.size() != 2) {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/prim.h b/mindspore/ccsrc/pipeline/jit/static_analysis/prim.h
similarity index 99%
rename from mindspore/ccsrc/pipeline/static_analysis/prim.h
rename to mindspore/ccsrc/pipeline/jit/static_analysis/prim.h
index 1346dba2a2b..692fbe66e88 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/prim.h
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/prim.h
@@ -25,7 +25,7 @@
 #include <unordered_map>
 #include <vector>
 
-#include "pipeline/static_analysis/evaluator.h"
+#include "pipeline/jit/static_analysis/evaluator.h"
 
 namespace mindspore {
 namespace abstract {
@@ -349,7 +349,6 @@ AbstractBasePtr InferImplControlDepend(const AnalysisEnginePtr &, const Primitiv
 
 AbstractBasePtr InferImplDebug(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                const AbstractBasePtrList &args_spec_list);
-void InitUndeterminedFromEnv(const std::string &sparse_shape_types);
 
 AbstractBasePtr InferImplMakeIndexedSlices(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                            const AbstractBasePtrList &args_spec_list);
diff --git a/mindspore/ccsrc/pipeline/static_analysis/program_specialize.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.cc
similarity index 96%
rename from mindspore/ccsrc/pipeline/static_analysis/program_specialize.cc
rename to mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.cc
index e01b98841ba..ad39190dc3c 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/program_specialize.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.cc
@@ -16,14 +16,14 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/program_specialize.h"
+#include "pipeline/jit/static_analysis/program_specialize.h"
 
 #include <algorithm>
 #include <exception>
 #include "./common.h"
-#include "operator/ops.h"
-#include "operator/composite/do_signature.h"
-#include "pipeline/static_analysis/abstract_function.h"
+#include "frontend/operator/ops.h"
+#include "frontend/operator/composite/do_signature.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
 #include "utils/graph_utils.h"
 #include "utils/log_adapter.h"
 #include "utils/profile.h"
@@ -321,7 +321,7 @@ AnfNodePtr FuncGraphSpecializer::BuildSpecializedNode(const AnfNodePtr &node, co
   AbstractFunctionPtr func = real_a->GetUnique();
   SpecializeStatusCode errcode;
   ScopeGuard scope_guard(node->scope());
-  AnfNodePtr repl = BuildSpecializedNodeInner(abs, func, argvals, &errcode);
+  AnfNodePtr repl = BuildSpecializedNodeInner(node, abs, func, argvals, &errcode);
   if (repl == nullptr) {
     if (errcode == kSpecializeFindUniqueArgvalDead) {
       const auto error_dead_node = std::make_shared<AbstractError>(kDeadNode, node);
@@ -340,7 +340,8 @@ AnfNodePtr FuncGraphSpecializer::BuildSpecializedNode(const AnfNodePtr &node, co
   return repl;
 }
 
-AnfNodePtr FuncGraphSpecializer::BuildSpecializedNodeInner(const AbstractBasePtr &abs, const AbstractFunctionPtr &func,
+AnfNodePtr FuncGraphSpecializer::BuildSpecializedNodeInner(const AnfNodePtr &node, const AbstractBasePtr &abs,
+                                                           const AbstractFunctionPtr &func,
                                                            const AbstractBasePtrList &args,
                                                            SpecializeStatusCode *errcode) {
   MS_EXCEPTION_IF_NULL(abs);
@@ -384,7 +385,14 @@ AnfNodePtr FuncGraphSpecializer::BuildSpecializedNodeInner(const AbstractBasePtr
   AnalysisContextPtr context = real_eval->MakeContext(engine_, argvals);
   MS_LOG(DEBUG) << "Specialize function graph: " << context->func_graph()->ToString() << ", args: " << argvals.size()
                 << ", graph: " << context->func_graph()->get_return()->DebugString();
+  if (context->func_graph()->stub()) {
+    MS_LOG(DEBUG) << "Specialize stub function graph, return the original node: " << context->func_graph()->ToString()
+                  << ", args: " << argvals.size() << ", graph: " << context->func_graph()->get_return()->DebugString()
+                  << ", " << node->ToString();
+    return node;
+  }
   FuncGraphPtr v = specializer_->SpecializeFuncGraph(context->func_graph(), context);
+  v->set_flag(kFuncGraphFlagUndetermined, false);
   return BuildValueNode(v, abs);
 }
 
@@ -613,7 +621,8 @@ SpecializeStatusCode FuncGraphSpecializer::FindUniqueArgvals(const AbstractFunct
     *result = std::make_pair(choices->begin()->first, choices->begin()->second->abstract());
     return kSpecializeSuccess;
   } else if (choices->empty()) {
-    MS_LOG(DEBUG) << "Find DEAD code, it may be optimized in later phase.";
+    MS_LOG(DEBUG) << "Find DEAD code, it may be optimized in later phase " << func->ToString() << " | "
+                  << func->type_name();
     return kSpecializeFindUniqueArgvalDead;
   } else {
     if (IsPolyFunc(func, argvals)) {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/program_specialize.h b/mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.h
similarity index 94%
rename from mindspore/ccsrc/pipeline/static_analysis/program_specialize.h
rename to mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.h
index b04978586d9..d7f95be4cab 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/program_specialize.h
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.h
@@ -29,7 +29,7 @@
 
 #include "ir/anf.h"
 #include "ir/func_graph_cloner.h"
-#include "pipeline/static_analysis/evaluator.h"
+#include "pipeline/jit/static_analysis/evaluator.h"
 
 namespace mindspore {
 namespace abstract {
@@ -118,8 +118,9 @@ class FuncGraphSpecializer : public std::enable_shared_from_this<FuncGraphSpecia
   // Build a specialized node from given argvals;
   AnfNodePtr BuildSpecializedNode(const AnfNodePtr &node, const AbstractBasePtr &abs,
                                   const AbstractBasePtrList &argvals);
-  AnfNodePtr BuildSpecializedNodeInner(const AbstractBasePtr &abs, const AbstractFunctionPtr &func,
-                                       const AbstractBasePtrList &args, SpecializeStatusCode *errcode);
+  AnfNodePtr BuildSpecializedNodeInner(const AnfNodePtr &node, const AbstractBasePtr &abs,
+                                       const AbstractFunctionPtr &func, const AbstractBasePtrList &args,
+                                       SpecializeStatusCode *errcode);
 
   // Find the unique argument values which can be used to specialize a primitive or graph function.
   SpecializeStatusCode FindUniqueArgvals(const AbstractFunctionPtr &fn, const EvaluatorPtr &eval,
diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.cc
similarity index 94%
rename from mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
rename to mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.cc
index 54165766803..b9e747a70bf 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.cc
@@ -16,21 +16,21 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 
 #include <algorithm>
 #include <set>
 
-#include "pipeline/static_analysis/utils.h"
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
+#include "abstract/utils.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
 #include "utils/symbolic.h"
 #include "ir/tensor.h"
 #include "ir/func_graph_cloner.h"
 #include "./common.h"
-#include "pipeline/parse/data_converter.h"
+#include "pipeline/jit/parse/data_converter.h"
 #include "debug/draw.h"
-#include "pipeline/static_analysis/evaluator.h"
+#include "pipeline/jit/static_analysis/evaluator.h"
 #include "debug/trace.h"
 
 namespace mindspore {
@@ -612,10 +612,34 @@ EvalResultPtr AnfNodeConfig::GetEvaluatedValue() {
   return engine_.lock()->GetEvaluatedValue(self);
 }
 
+abstract::AbstractBasePtr MakeAbstractClosure(const FuncGraphPtr &func_graph,
+                                              const abstract::AnalysisContextPtr &context) {
+  AnalysisContextPtr temp_context = context;
+  if (temp_context == nullptr) {
+    temp_context = abstract::AnalysisContext::DummyContext();
+  }
+  return std::make_shared<abstract::FuncGraphAbstractClosure>(func_graph, temp_context);
+}
+
+abstract::AbstractBasePtr MakeAbstractClosure(const MetaFuncGraphPtr &meta_func_graph, const AnfNodePtr &anf_node) {
+  abstract::MetaFuncGraphAbstractClosurePtr meta_func_graph_fn;
+  if (anf_node == nullptr) {
+    meta_func_graph_fn = std::make_shared<abstract::MetaFuncGraphAbstractClosure>(meta_func_graph);
+  } else {
+    meta_func_graph_fn = std::make_shared<abstract::MetaFuncGraphAbstractClosure>(meta_func_graph, anf_node->scope());
+  }
+  return meta_func_graph_fn;
+}
+
+abstract::AbstractBasePtr MakeAbstractClosure(const PrimitivePtr &primitive, const AnfNodePtr &anf_node) {
+  auto prim_func = std::make_shared<abstract::PrimitiveAbstractClosure>(primitive, anf_node);
+  return prim_func;
+}
+
 AbstractBasePtr ToAbstract(const ValuePtr &value, const AnalysisContextPtr &context, const AnfNodeConfigPtr &conf) {
   if (value->isa<FuncGraph>()) {
     auto func_graph = value->cast<FuncGraphPtr>();
-    return func_graph->MakeAbstractClosure(context);
+    return MakeAbstractClosure(func_graph, context);
   }
   AnfNodePtr anf_node = nullptr;
   if (conf != nullptr) {
@@ -623,11 +647,11 @@ AbstractBasePtr ToAbstract(const ValuePtr &value, const AnalysisContextPtr &cont
   }
   if (value->isa<MetaFuncGraph>()) {
     auto meta_func_graph = value->cast<MetaFuncGraphPtr>();
-    return meta_func_graph->MakeAbstractClosure(anf_node);
+    return MakeAbstractClosure(meta_func_graph, anf_node);
   }
   if (value->isa<Primitive>()) {
     auto prim = value->cast<PrimitivePtr>();
-    return prim->ToPrimAbstract(anf_node);
+    return MakeAbstractClosure(prim, anf_node);
   }
   return value->ToAbstract();
 }
diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h b/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.h
similarity index 98%
rename from mindspore/ccsrc/pipeline/static_analysis/static_analysis.h
rename to mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.h
index a0b7ee54787..181696f7562 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.h
@@ -33,10 +33,10 @@
 
 #include "utils/log_adapter.h"
 #include "ir/anf.h"
-#include "ir/primitive.h"
-#include "pipeline/static_analysis/analysis_context.h"
-#include "pipeline/static_analysis/abstract_function.h"
-#include "pipeline/parse/parse.h"
+#include "ir/primitive_py.h"
+#include "abstract/analysis_context.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
+#include "pipeline/jit/parse/parse.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/mindspore/ccsrc/pipeline/validator.cc b/mindspore/ccsrc/pipeline/jit/validator.cc
similarity index 97%
rename from mindspore/ccsrc/pipeline/validator.cc
rename to mindspore/ccsrc/pipeline/jit/validator.cc
index bbca3c8721e..04aa6efd05b 100644
--- a/mindspore/ccsrc/pipeline/validator.cc
+++ b/mindspore/ccsrc/pipeline/jit/validator.cc
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/validator.h"
+#include "pipeline/jit/validator.h"
 
 #include <memory>
 #include <mutex>
@@ -24,7 +24,7 @@
 #include "ir/manager.h"
 #include "ir/dtype.h"
 #include "./common.h"
-#include "pipeline/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/prim.h"
 
 namespace mindspore {
 namespace validator {
diff --git a/mindspore/ccsrc/pipeline/validator.h b/mindspore/ccsrc/pipeline/jit/validator.h
similarity index 97%
rename from mindspore/ccsrc/pipeline/validator.h
rename to mindspore/ccsrc/pipeline/jit/validator.h
index 61f74703496..041448aed92 100644
--- a/mindspore/ccsrc/pipeline/validator.h
+++ b/mindspore/ccsrc/pipeline/jit/validator.h
@@ -23,7 +23,7 @@
 #include <iostream>
 #include <memory>
 #include <unordered_set>
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/anf.h"
 #include "utils/misc.h"
 
diff --git a/mindspore/ccsrc/pynative/CMakeLists.txt b/mindspore/ccsrc/pipeline/pynative/CMakeLists.txt
similarity index 84%
rename from mindspore/ccsrc/pynative/CMakeLists.txt
rename to mindspore/ccsrc/pipeline/pynative/CMakeLists.txt
index 5139160774a..c15928ee768 100644
--- a/mindspore/ccsrc/pynative/CMakeLists.txt
+++ b/mindspore/ccsrc/pipeline/pynative/CMakeLists.txt
@@ -6,4 +6,4 @@ if (ENABLE_GE)
 endif ()
 
 set_property(SOURCE ${_PYNATIVE_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PYNATIVE)
-add_library(_mindspore_pynative_obj OBJECT ${_PYNATIVE_SRC_LIST})
+add_library(_mindspore_pipeline_pynative_obj OBJECT ${_PYNATIVE_SRC_LIST})
diff --git a/mindspore/ccsrc/pynative/base.h b/mindspore/ccsrc/pipeline/pynative/base.h
similarity index 95%
rename from mindspore/ccsrc/pynative/base.h
rename to mindspore/ccsrc/pipeline/pynative/base.h
index 60ae8692277..afb6d0982ba 100644
--- a/mindspore/ccsrc/pynative/base.h
+++ b/mindspore/ccsrc/pipeline/pynative/base.h
@@ -26,8 +26,8 @@
 #include <unordered_set>
 
 #include "pybind11/pybind11.h"
-#include "ir/primitive.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "ir/primitive_py.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 namespace pynative {
diff --git a/mindspore/ccsrc/pynative/pynative_execute.cc b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
similarity index 89%
rename from mindspore/ccsrc/pynative/pynative_execute.cc
rename to mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
index f477bfbdcdd..5e3add1b5fb 100644
--- a/mindspore/ccsrc/pynative/pynative_execute.cc
+++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pynative/pynative_execute.h"
+#include "pipeline/pynative/pynative_execute.h"
 
 #include <typeinfo>
 #include <map>
@@ -24,33 +24,33 @@
 
 #include "debug/trace.h"
 #include "ir/tensor_py.h"
-#include "ir/param_value_py.h"
+#include "ir/param_value.h"
 #include "utils/any.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
-#include "operator/ops.h"
-#include "operator/composite/composite.h"
-#include "operator/composite/do_signature.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/resolve.h"
-#include "pipeline/static_analysis/prim.h"
-#include "session/session_factory.h"
-#include "pre_activate/pass/const_input_to_attr_registry.h"
-#include "pre_activate/common/helper.h"
-#include "pipeline/action.h"
+#include "frontend/operator/ops.h"
+#include "frontend/operator/composite/composite.h"
+#include "frontend/operator/composite/do_signature.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/resolve.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "backend/session/session_factory.h"
+#include "backend/optimizer/pass/const_input_to_attr_registry.h"
+#include "backend/optimizer/common/helper.h"
+#include "pipeline/jit/action.h"
 
-#include "pynative/base.h"
+#include "pipeline/pynative/base.h"
 #include "pybind_api/api_register.h"
 #include "vm/transform.h"
 
-#include "optimizer/ad/grad.h"
-#include "pipeline/resource.h"
-#include "pipeline/pipeline.h"
-#include "pipeline/pass.h"
+#include "frontend/optimizer/ad/grad.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/pipeline.h"
+#include "pipeline/jit/pass.h"
 
 #ifdef ENABLE_GE
-#include "pynative/pynative_execute_ge.h"
+#include "pipeline/pynative/pynative_execute_ge.h"
 #endif
 
 using mindspore::tensor::TensorPy;
@@ -160,36 +160,83 @@ std::map<SignatureEnumDType, std::vector<size_t>> GetTypeIndex(const std::vector
   return type_indexes;
 }
 
-std::map<SignatureEnumDType, size_t> GetDstType(const py::tuple &py_args,
+std::map<SignatureEnumDType, TypeId> GetDstType(const py::tuple &py_args,
                                                 const std::map<SignatureEnumDType, std::vector<size_t>> &type_indexes) {
-  std::map<SignatureEnumDType, size_t> dst_type;
+  std::map<SignatureEnumDType, TypeId> dst_type;
   for (auto it = type_indexes.begin(); it != type_indexes.end(); (void)++it) {
     auto type = it->first;
     auto indexes = it->second;
-    if (indexes.size() < 2) {
+    if (type == SignatureEnumDType::kDTypeEmptyDefaultValue || indexes.size() < 2) {
       continue;
     }
-    size_t m_index = indexes[0];
-    for (size_t i = 1; i < indexes.size(); ++i) {
-      if (py::isinstance<tensor::Tensor>(py_args[indexes[i]])) {
-        m_index = indexes[i];
+    size_t priority = 0;
+    TypeId max_type = TypeId::kTypeUnknown;
+    bool has_float = false;
+    bool has_int = false;
+    for (size_t index : indexes) {
+      if (!has_float && py::isinstance<py::float_>(py_args[index])) {
+        has_float = true;
+      }
+      if (!has_int && !py::isinstance<py::bool_>(py_args[index]) && py::isinstance<py::int_>(py_args[index])) {
+        has_int = true;
+      }
+      if (py::isinstance<tensor::Tensor>(py_args[index])) {
+        auto arg = py::cast<tensor::TensorPtr>(py_args[index]);
+        TypeId arg_type_id = arg->data_type();
+        auto type_priority = prim::type_map.find(arg_type_id);
+        if (type_priority == prim::type_map.end()) {
+          continue;
+        }
+        if (type_priority->second > priority) {
+          max_type = type_priority->first;
+          priority = type_priority->second;
+        }
       }
     }
-    (void)dst_type.insert(std::make_pair(type, m_index));
+    if (max_type == TypeId::kNumberTypeBool) {
+      if (has_int) {
+        max_type = TypeId::kNumberTypeInt32;
+      }
+      if (has_float) {
+        max_type = TypeId::kNumberTypeFloat32;
+      }
+    }
+    (void)dst_type.insert(std::make_pair(type, max_type));
   }
   return dst_type;
 }
 
+std::string TypeIdToMsTypeStr(const TypeId &type_id) {
+  auto type_name = type_name_map.find(type_id);
+  if (type_name == type_name_map.end()) {
+    MS_LOG(EXCEPTION) << "For implicit type conversion, not support convert to the type: " << TypeIdToType(type_id);
+  }
+  return type_name->second;
+}
+
+py::object DoAutoCast(const py::object &arg, const TypeId &type_id) {
+  py::tuple args(3);
+  std::string module_name = "mindspore.ops.functional";
+  std::string op_name = "cast";
+  args[0] = parse::python_adapter::GetPyFn(module_name, op_name);
+  args[1] = "Cast";
+
+  std::string dst_type_str = TypeIdToMsTypeStr(type_id);
+  module_name = "mindspore.common.dtype";
+  py::object dst_type = parse::python_adapter::GetPyFn(module_name, dst_type_str);
+  py::tuple inputs(2);
+  inputs[0] = arg;
+  inputs[1] = dst_type;
+  args[2] = inputs;
+
+  return RunOp(args)[0];
+}
 py::tuple ConvertInputs(const PrimitivePyPtr &prim, const py::list &args, py::tuple *const out_args,
                         py::list *const out_args_list) {
   auto &py_args = *out_args;
   py::tuple input_mask(args.size());
   for (size_t i = 0; i < args.size(); ++i) {
-    if (py::hasattr(args[i], "__parameter__")) {
-      input_mask[i] = true;
-    } else {
-      input_mask[i] = false;
-    }
+    input_mask[i] = py::hasattr(args[i], "__parameter__");
     py_args[i] = GetTupleObj(args[i]);
   }
   auto signature = prim->signatures();
@@ -197,26 +244,33 @@ py::tuple ConvertInputs(const PrimitivePyPtr &prim, const py::list &args, py::tu
   (void)std::transform(signature.begin(), signature.end(), std::back_inserter(dtypes),
                        [](const Signature &sig) { return sig.dtype; });
   int empty_dtype_count = std::count(dtypes.begin(), dtypes.end(), SignatureEnumDType::kDTypeEmptyDefaultValue);
-  if (dtypes.size() == 0 || static_cast<int>(dtypes.size()) == empty_dtype_count) {
+  if (dtypes.empty() || static_cast<int>(dtypes.size()) == empty_dtype_count) {
     return input_mask;
   }
   auto type_indexes = GetTypeIndex(dtypes);
   auto dst_type = GetDstType(py_args, type_indexes);
-  for (size_t i = 0; i < py_args.size(); ++i) {
-    auto it = dst_type.find(dtypes[i]);
-    if (it != dst_type.end() && it->second != i &&
-        (py::isinstance<py::int_>(py_args[i]) || py::isinstance<py::float_>(py_args[i]))) {
-      auto tensor_ptr = py::cast<tensor::TensorPtr>(py_args[it->second]);
-      if (py::isinstance<py::int_>(py_args[i])) {
-        py_args[i] = std::make_shared<tensor::Tensor>(py::cast<py::int_>(py_args[i]), tensor_ptr->Dtype());
-        (*out_args_list)[i] = py_args[i];
-      } else {
-        double arg_value = py::cast<py::float_>(py_args[i]);
-        py_args[i] = std::make_shared<tensor::Tensor>(arg_value, tensor_ptr->Dtype());
-        (*out_args_list)[i] = py_args[i];
-      }
+
+  for (size_t i = 0; i < dtypes.size(); ++i) {
+    if (dtypes[i] == SignatureEnumDType::kDTypeEmptyDefaultValue) {
       continue;
     }
+    auto it = dst_type.find(dtypes[i]);
+    if (it == dst_type.end() || it->second == kTypeUnknown) {
+      continue;
+    }
+    if (py::isinstance<tensor::Tensor>(py_args[i])) {
+      auto arg = py::cast<tensor::TensorPtr>(py_args[i]);
+      if (arg->data_type() == it->second) {
+        continue;
+      }
+      if (signature[i].rw == SignatureEnumRW::kRWWrite) {
+        prim::RaiseExceptionForConvertRefDtype(prim->name(), TypeIdToMsTypeStr(arg->data_type()),
+                                               TypeIdToMsTypeStr(it->second));
+      }
+    }
+    py::object cast_output = DoAutoCast(py_args[i], it->second);
+    (*out_args)[i] = cast_output;
+    (*out_args_list)[i] = cast_output;
   }
   return input_mask;
 }
@@ -297,14 +351,13 @@ py::object RunOpInVM(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *stat
     for (size_t i = 0; i < op_inputs.size(); i++) {
       py::object input = op_inputs[i];
       if (py::hasattr(input, "__parameter__")) {
-        result[i] = py::getattr(input, "data");
-      } else {
-        auto tensor = py::cast<tensor::TensorPtr>(input);
-        auto new_tensor = std::make_shared<tensor::Tensor>(tensor->data_type(), tensor->shape(), tensor->data_ptr());
-        new_tensor->set_device_address(tensor->device_address());
-        new_tensor->set_dirty(tensor->is_dirty());
-        result[i] = new_tensor;
+        input = py::getattr(input, "data");
       }
+      auto tensor = py::cast<tensor::TensorPtr>(input);
+      auto new_tensor = std::make_shared<tensor::Tensor>(tensor->data_type(), tensor->shape(), tensor->data_ptr());
+      new_tensor->set_device_address(tensor->device_address());
+      new_tensor->set_dirty(tensor->is_dirty());
+      result[i] = new_tensor;
     }
     *status = PYNATIVE_SUCCESS;
     MS_LOG(INFO) << "RunOpInVM end";
@@ -754,7 +807,7 @@ AnfNodePtr PynativeExecutor::GetInput(const py::object &obj, const py::object &o
     if (graph_info_map_[df_builder_].param_map.count(obj_id) == 0) {
       auto free_param = df_builder_->add_parameter();
       free_param->set_name(param_name);
-      auto free_param_new = std::make_shared<ParamValuePy>(obj);
+      auto free_param_new = py::cast<ParamValuePtr>(obj.attr("_value"));
       free_param->set_default_param(free_param_new);
       free_param->debug_info()->set_name(param_name);
       MS_LOG(DEBUG) << "Top graph set free parameter " << obj_id;
@@ -926,7 +979,7 @@ std::vector<AnfNodePtr> PynativeExecutor::GetWeightsArgs(const py::object &weigh
       }
     }
   } else {
-    MS_LOG(EXCEPTION) << "training not paramter_tuple";
+    MS_LOG(DEBUG) << "training not paramter_tuple";
   }
   return w_args;
 }
@@ -950,8 +1003,9 @@ abstract::AbstractBasePtrList PynativeExecutor::GetArgsSpec(const py::args &args
   for (const auto &param : df_builder_->parameters()) {
     auto param_node = std::static_pointer_cast<Parameter>(param);
     if (param_node->has_default()) {
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param());
-      AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true);
+      const auto &param_value = param_node->default_param();
+      ValuePtr value = param_value->value();
+      AbstractBasePtr ptr = abstract::FromValue(value, true);
       if (ptr == nullptr) {
         MS_LOG(EXCEPTION) << "Args convert error";
       }
diff --git a/mindspore/ccsrc/pynative/pynative_execute.h b/mindspore/ccsrc/pipeline/pynative/pynative_execute.h
similarity index 97%
rename from mindspore/ccsrc/pynative/pynative_execute.h
rename to mindspore/ccsrc/pipeline/pynative/pynative_execute.h
index 83cbea88d48..152d58aca44 100644
--- a/mindspore/ccsrc/pynative/pynative_execute.h
+++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute.h
@@ -28,11 +28,11 @@
 #include "pybind11/pybind11.h"
 #include "pybind11/numpy.h"
 
-#include "pynative/base.h"
+#include "pipeline/pynative/base.h"
 #include "utils/context/ms_context.h"
 #include "ir/anf.h"
-#include "pipeline/resource.h"
-#include "operator/composite/composite.h"
+#include "pipeline/jit/resource.h"
+#include "frontend/operator/composite/composite.h"
 
 namespace mindspore {
 namespace pynative {
diff --git a/mindspore/ccsrc/pynative/pynative_execute_ge.cc b/mindspore/ccsrc/pipeline/pynative/pynative_execute_ge.cc
similarity index 98%
rename from mindspore/ccsrc/pynative/pynative_execute_ge.cc
rename to mindspore/ccsrc/pipeline/pynative/pynative_execute_ge.cc
index 8e10468236d..897c21fc90c 100644
--- a/mindspore/ccsrc/pynative/pynative_execute_ge.cc
+++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute_ge.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pynative/pynative_execute_ge.h"
+#include "pipeline/pynative/pynative_execute_ge.h"
 
 #include <typeinfo>
 #include <map>
@@ -24,10 +24,10 @@
 #include "utils/any.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
-#include "operator/ops.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/static_analysis/prim.h"
-#include "session/session_factory.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "backend/session/session_factory.h"
 #include "ir/tensor_py.h"
 
 const char SINGLE_OP_GRAPH[] = "single_op_graph";
diff --git a/mindspore/ccsrc/pynative/pynative_execute_ge.h b/mindspore/ccsrc/pipeline/pynative/pynative_execute_ge.h
similarity index 90%
rename from mindspore/ccsrc/pynative/pynative_execute_ge.h
rename to mindspore/ccsrc/pipeline/pynative/pynative_execute_ge.h
index 2dca3df0187..2978278489a 100644
--- a/mindspore/ccsrc/pynative/pynative_execute_ge.h
+++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute_ge.h
@@ -23,10 +23,10 @@
 #include <memory>
 #include <unordered_map>
 
-#include "pynative/base.h"
-#include "transform/convert.h"
-#include "transform/graph_runner.h"
-#include "transform/types.h"
+#include "pipeline/pynative/base.h"
+#include "transform/graph_ir/convert.h"
+#include "transform/graph_ir/graph_runner.h"
+#include "transform/graph_ir/types.h"
 #include "utils/context/ms_context.h"
 
 using GeTensor = ge::Tensor;
diff --git a/mindspore/ccsrc/predict/converter/attr_utils/convert_util.h b/mindspore/ccsrc/predict/converter/attr_utils/convert_util.h
index 5c7551a190b..612ccde1a51 100644
--- a/mindspore/ccsrc/predict/converter/attr_utils/convert_util.h
+++ b/mindspore/ccsrc/predict/converter/attr_utils/convert_util.h
@@ -25,7 +25,7 @@
 #include <string>
 #include <fstream>
 #include "ir/tensor.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "predict/schema/inner/ms_generated.h"
 
 using TensorPtr = mindspore::tensor::TensorPtr;
diff --git a/mindspore/ccsrc/predict/converter/kernel2ms.cc b/mindspore/ccsrc/predict/converter/kernel2ms.cc
index 1b1277aadeb..04aceb62eb8 100644
--- a/mindspore/ccsrc/predict/converter/kernel2ms.cc
+++ b/mindspore/ccsrc/predict/converter/kernel2ms.cc
@@ -18,7 +18,7 @@
 #include <algorithm>
 #include "ir/anf.h"
 #include "predict/converter/lite_model/op_attr_packer.h"
-#include "mindspore/ccsrc/operator/ops.h"
+#include "mindspore/ccsrc/frontend/operator/ops.h"
 
 namespace mindspore {
 namespace executor {
diff --git a/mindspore/ccsrc/predict/converter/kernel2ms.h b/mindspore/ccsrc/predict/converter/kernel2ms.h
index 7013f881078..8cbc89ed6a6 100644
--- a/mindspore/ccsrc/predict/converter/kernel2ms.h
+++ b/mindspore/ccsrc/predict/converter/kernel2ms.h
@@ -22,7 +22,7 @@
 #include <memory>
 #include <vector>
 #include <utility>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include "predict/converter/executor_tensor.h"
 #include "predict/schema/inner/ms_generated.h"
 #include "predict/converter/attr_utils/convert_util.h"
diff --git a/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.h b/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.h
index 89e38d1871f..31f14ef73a3 100644
--- a/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.h
+++ b/mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.h
@@ -20,7 +20,7 @@
 #include <utility>
 #include <string>
 #include <unordered_map>
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "predict/schema/inner/ms_generated.h"
 
 static constexpr size_t kNIndex = 0;
diff --git a/mindspore/ccsrc/predict/predict.h b/mindspore/ccsrc/predict/predict.h
index 7c65f166194..91254514921 100644
--- a/mindspore/ccsrc/predict/predict.h
+++ b/mindspore/ccsrc/predict/predict.h
@@ -19,7 +19,7 @@
 
 #include <memory>
 #include <vector>
-#include "session/session_basic.h"
+#include "backend/session/session_basic.h"
 #include "predict/converter/kernel2ms.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/CMakeLists.txt b/mindspore/ccsrc/runtime/device/CMakeLists.txt
similarity index 96%
rename from mindspore/ccsrc/device/CMakeLists.txt
rename to mindspore/ccsrc/runtime/device/CMakeLists.txt
index 652c04d4cd8..9c95aee0dca 100644
--- a/mindspore/ccsrc/device/CMakeLists.txt
+++ b/mindspore/ccsrc/runtime/device/CMakeLists.txt
@@ -62,4 +62,4 @@ endif ()
 
 set_property(SOURCE ${DEVICE_SRC_LIST} ${D_SRC_LIST} ${CPU_SRC_LIST}
     PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
-add_library(_mindspore_device_obj OBJECT ${DEVICE_SRC_LIST} ${D_SRC_LIST} ${CPU_SRC_LIST})
+add_library(_mindspore_runtime_device_obj OBJECT ${DEVICE_SRC_LIST} ${D_SRC_LIST} ${CPU_SRC_LIST})
diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
similarity index 93%
rename from mindspore/ccsrc/device/ascend/ascend_device_address.cc
rename to mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
index c4b8717fa50..1a87f3e6afe 100644
--- a/mindspore/ccsrc/device/ascend/ascend_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/ascend/ascend_device_address.h"
+#include "runtime/device/ascend/ascend_device_address.h"
 #include <memory>
 #include <vector>
 #include <set>
 #include <algorithm>
 #include "runtime/mem.h"
-#include "device/kernel_runtime_manager.h"
-#include "device/convert_tensor_utils.h"
+#include "runtime/device/kernel_runtime_manager.h"
+#include "runtime/device/convert_tensor_utils.h"
 #include "ir/dtype/type.h"
 #include "ir/tensor.h"
-#include "kernel/common_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
 #include "utils/utils.h"
 #include "common/utils.h"
 #include "common/trans.h"
@@ -303,12 +303,22 @@ bool AscendDeviceAddress::ConvertFormatAndSyncHostToDevice(const std::vector<int
   return sync_ok;
 }
 
+void AscendDeviceAddress::UpdateCommunicationAddress() {
+  MS_EXCEPTION_IF_NULL(ptr_);
+  communication_ptr_ = reinterpret_cast<uint8_t *>(ptr_) - kMemAlignSize;
+}
+
 AscendDeviceAddress::~AscendDeviceAddress() {
   if (ptr_ == nullptr) {
     return;
   }
   if (from_mem_pool_) {
-    AscendMemoryPool::GetInstance().FreeTensorMem(ptr_);
+    if (communication_ptr_ != nullptr) {
+      AscendMemoryPool::GetInstance().FreeTensorMem(communication_ptr_);
+      communication_ptr_ = nullptr;
+    } else {
+      AscendMemoryPool::GetInstance().FreeTensorMem(ptr_);
+    }
     ptr_ = nullptr;
   }
 }
@@ -360,12 +370,15 @@ bool AscendDeviceAddress::DumpMemToFile(bool trans_flag, const std::string &file
 #ifdef ENABLE_DEBUGGER
 bool AscendDeviceAddress::LoadMemToHost(bool trans_flag, const std::string &tensor_name, int execution_order,
                                         const std::string &host_fmt, const std::vector<int> &host_shape,
-                                        TypeId host_type, size_t slot, Debugger *debugger) const {
+                                        TypeId host_type, size_t slot, Debugger *debugger, bool keep_prev) const {
   bool ret = false;
-
-  DebugServices *debug_services = debugger->get_debug_services();
-  TensorLoader *tensor_loader = debug_services->get_tensor_loader();
-
+  DebugServices *debug_services = debugger->debug_services();
+  TensorLoader *tensor_loader = debug_services->tensor_loader();
+  // TensorData is freed up in AscendSession class
+  auto tensor_data = std::make_shared<mindspore::TensorData>();
+  tensor_data->SetName(tensor_name);
+  tensor_data->SetExecutionOrder(execution_order);
+  tensor_data->SetSlot(slot);
   if (trans_flag) {
     MS_LOG(INFO) << "E2E tensor name is " << tensor_name;
     mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(host_type, host_shape);
@@ -375,28 +388,18 @@ bool AscendDeviceAddress::LoadMemToHost(bool trans_flag, const std::string &tens
       MS_LOG(ERROR) << "Copy device mem to host failed";
       return ret;
     }
-    auto tensor_data = std::make_shared<mindspore::TensorData>();
-    tensor_data->SetName(tensor_name);
-    tensor_data->SetExecutionOrder(execution_order);
     tensor_data->SetTensor(out_tensor);
-    tensor_data->SetSlot(slot);
-    ret = tensor_loader->LoadNewTensor(tensor_data);
   } else {
     mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(type_id_, host_shape);
     size_t host_size = out_tensor->data().nbytes();
     auto ret_rt_memcpy = rtMemcpy(out_tensor->data_c(), host_size, ptr_, host_size, RT_MEMCPY_DEVICE_TO_HOST);
-
-    auto tensor_data = std::make_shared<mindspore::TensorData>();
-    tensor_data->SetName(tensor_name);
-    tensor_data->SetExecutionOrder(execution_order);
-    tensor_data->SetTensor(out_tensor);
-    tensor_data->SetSlot(slot);
-    ret = tensor_loader->LoadNewTensor(tensor_data);
     if (ret_rt_memcpy != RT_ERROR_NONE) {
       MS_LOG(ERROR) << "SyncDeviceToHost: rtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]";
     }
     MS_LOG(INFO) << "E2E tensor name is " << tensor_name;
+    tensor_data->SetTensor(out_tensor);
   }
+  ret = tensor_loader->LoadNewTensor(tensor_data, keep_prev);
   return ret;
 }
 #endif
diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.h b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h
similarity index 90%
rename from mindspore/ccsrc/device/ascend/ascend_device_address.h
rename to mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h
index 16b9f7817a9..78d7006b566 100644
--- a/mindspore/ccsrc/device/ascend/ascend_device_address.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h
@@ -20,8 +20,8 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include "device/device_address.h"
-#include "device/ascend/ascend_memory_pool.h"
+#include "runtime/device/device_address.h"
+#include "runtime/device/ascend/ascend_memory_pool.h"
 #include "ir/dtype.h"
 
 namespace mindspore {
@@ -39,13 +39,15 @@ class AscendDeviceAddress : public DeviceAddress {
   bool SyncDeviceToHost(const std::vector<int> &shape, size_t size, TypeId type, void *host_ptr) const override;
   bool SyncHostToDevice(const std::vector<int> &shape, size_t size, TypeId type, const void *host_ptr) const override;
   DeviceAddressType DeviceType() const override { return DeviceAddressType::kAscend; }
+  void UpdateCommunicationAddress() override;
 #ifdef ENABLE_DUMP_E2E
   bool DumpMemToFile(bool dump_mode, const std::string &filepath, const std::string &host_fmt,
                      const std::vector<int> &host_shape, TypeId host_type) const;
 #endif
 #ifdef ENABLE_DEBUGGER
   bool LoadMemToHost(bool dump_mode, const std::string &tensor_name, int execution_order, const std::string &host_fmt,
-                     const std::vector<int> &host_shape, TypeId host_type, size_t slot, Debugger *debugger) const;
+                     const std::vector<int> &host_shape, TypeId host_type, size_t slot, Debugger *debugger,
+                     bool keep_prev) const;
 #endif
 
  private:
@@ -53,6 +55,7 @@ class AscendDeviceAddress : public DeviceAddress {
   bool ConvertFormatAndSyncHostToDevice(const std::vector<int> &shape, size_t size, TypeId type,
                                         const void *host_ptr) const;
   void SyncStream() const;
+  uint8_t *communication_ptr_{nullptr};
 };
 using AscendDeviceAddressPtr = std::shared_ptr<AscendDeviceAddress>;
 }  // namespace ascend
diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
similarity index 87%
rename from mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
rename to mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
index efdcb987552..3ab3a52d42f 100644
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
@@ -14,34 +14,35 @@
  * limitations under the License.
  */
 #define PATH_MAX 0x3ffff
-#include "device/ascend/ascend_kernel_runtime.h"
+#include "runtime/device/ascend/ascend_kernel_runtime.h"
 #include <string>
 #include <vector>
 #include <memory>
 #include <utility>
 #include <exception>
 #include <algorithm>
-#include "device/ascend/ascend_device_address.h"
-#include "device/cpu/mpi/mpi_adapter.h"
+#include "runtime/device/ascend/ascend_device_address.h"
+#include "runtime/device/cpu/mpi/mpi_adapter.h"
 #include "utils/context/ms_context.h"
 #include "utils/mpi/mpi_config.h"
-#include "device/ascend/profiling/profiling_manager.h"
+#include "runtime/device/ascend/profiling/profiling_manager.h"
 #include "hccl/hcom.h"
 #include "common/trans.h"
 #include "runtime/context.h"
-#include "device/ascend/ascend_label_assign.h"
-#include "device/ascend/ascend_stream_assign.h"
-#include "device/ascend/ascend_memory_pool.h"
+#include "runtime/device/ascend/ascend_label_assign.h"
+#include "runtime/device/ascend/ascend_stream_assign.h"
+#include "runtime/device/ascend/ascend_memory_pool.h"
 #include "framework/ge_runtime/model_runner.h"
-#include "device/ascend/tasksink/task_generator.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/ascend/profiling/profiling_utils.h"
-#include "kernel/tbe/tbe_utils.h"
-#include "kernel/tbe/tbe_python_funcs.h"
-#include "pre_activate/mem_reuse/mem_reuse_checker.h"
-#include "device/ascend/ascend_memory_manager.h"
+#include "runtime/device/ascend/tasksink/task_generator.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/ascend/profiling/profiling_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/tbe/tbe_python_funcs.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
+#include "runtime/device/ascend/ascend_memory_manager.h"
 #include "debug/tensor_load.h"
 
+using ge::model_runner::ModelRunner;
 using mindspore::device::ascend::ProfilingManager;
 using mindspore::device::ascend::ProfilingUtils;
 using mindspore::device::ascend::tasksink::TaskGenerator;
@@ -90,9 +91,16 @@ std::string GetRankId() {
 AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); }
 
 void AscendKernelRuntime::ClearGraphModelMap() {
+#ifdef ENABLE_DATA_DUMP
+  for (auto &iter : graph_data_dumper_) {
+    MS_LOG(INFO) << "[DataDump] Unload data dumper:" << iter.first;
+    iter.second->UnloadDumpInfo();
+  }
+  graph_data_dumper_.clear();
+#endif
   for (auto &iter : graph_model_map_) {
     MS_LOG(INFO) << "Ge UnloadModel " << iter.first;
-    auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter.first);
+    auto ret = ModelRunner::Instance().UnloadModel(iter.first);
     if (!ret) {
       MS_LOG(ERROR) << "UnloadModel failed";
     }
@@ -107,7 +115,7 @@ void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
     return;
   }
   MS_LOG(DEBUG) << "Ge UnloadModel " << iter->first;
-  auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter->first);
+  auto ret = ModelRunner::Instance().UnloadModel(iter->first);
   if (!ret) {
     MS_LOG(ERROR) << "UnloadModel failed";
   }
@@ -159,6 +167,10 @@ bool AscendKernelRuntime::Init() {
   }
 #endif
 
+#ifdef ENABLE_DATA_DUMP
+  DataDumpParser::GetInstance().ParseDumpConfig();
+#endif
+
   // Start up profiling before rtSetDevice
   ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
   if (!ret) {
@@ -299,15 +311,24 @@ bool AscendKernelRuntime::DumpData(mindspore::session::KernelGraph *graph) {
 namespace {
 void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) {
   MS_EXCEPTION_IF_NULL(graph);
+  // trans_flag: "true" means tensor values will be transfered to host format, otherwise not.
   bool trans_flag = false;
   const auto &apply_kernels = graph->execution_order();
   // for kernels, execution order starts from 1
   int exec_order = 1;
+  auto debugger_ = mindspore::Debugger::GetInstance();
+  DebugServices *debug_services = debugger_->debug_services();
+  auto watchpoint_table = debug_services->GetWatchpointTable();
   for (const auto &node : apply_kernels) {
     MS_EXCEPTION_IF_NULL(node);
     auto node_name = AnfAlgo::GetCNodeName(node);
     std::string kernel_name = node->fullname_with_scope();
     auto output_size = AnfAlgo::GetOutputTensorNum(node);
+    if (debugger_->partial_memory()) {
+      if (!debug_services->IsWatchPoint(kernel_name, watchpoint_table)) {
+        continue;
+      }
+    }
     for (size_t j = 0; j < output_size; ++j) {
       auto addr = AnfAlgo::GetOutputAddr(node, j);
       auto type = AnfAlgo::GetOutputInferDataType(node, j);
@@ -322,7 +343,8 @@ void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) {
         (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
                              [](size_t inner_item) { return SizeToInt(inner_item); });
       }
-      auto ret = ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, j, debugger);
+      auto ret =
+        ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, j, debugger, false);
       if (!ret) {
         MS_LOG(ERROR) << "LoadMemToHost: flag:" << trans_flag << ", tensor_name:" << tensor_name
                       << ", host_format:" << format << ".!";
@@ -334,6 +356,7 @@ void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) {
 
 void LoadParameters(mindspore::session::KernelGraph *graph, Debugger *debugger) {
   MS_EXCEPTION_IF_NULL(graph);
+  // trans_flag: "true" means tensor values will be transfered to host format, otherwise not.
   bool trans_flag = false;
   const auto &parameters = graph->inputs();
   // for parameters, set its execution order to be 0;
@@ -356,7 +379,8 @@ void LoadParameters(mindspore::session::KernelGraph *graph, Debugger *debugger)
       (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
                            [](size_t inner_item) { return SizeToInt(inner_item); });
     }
-    auto ret = ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, 0, debugger);
+    auto ret =
+      ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, 0, debugger, true);
     if (!ret) {
       MS_LOG(ERROR) << "LoadMemToHost Failed: flag:" << trans_flag << ", path:" << tensor_name
                     << ", host_format:" << format << ".!";
@@ -438,7 +462,7 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
                << ", wait_active_stream_list size:" << wait_active_stream_list.size()
                << ", force_copy_stream_list size:" << force_copy_stream_list.size();
   std::vector<std::shared_ptr<ge::model_runner::OpInfo>> empty_list;
-  std::shared_ptr<ge::model_runner::DavinciModel> model = std::make_shared<ge::model_runner::DavinciModel>(
+  auto model = std::make_shared<ge::model_runner::DavinciModel>(
     task_info_list, empty_list, empty_list, empty_list, empty_list, wait_active_stream_list, force_copy_stream_list, 0,
     0, 0, 0, 0, 0, resource_manager.get_cur_stream_num(), label_assign_instance.GetLabelNum(NOT_NULL(graph)),
     resource_manager.get_cur_event_num(), 0);
@@ -475,21 +499,45 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
 
   std::shared_ptr<ge::ModelListener> listener;
   MS_LOG(INFO) << "LoadDavinciModel mode_id:" << model_iter->first;
-  bool status = ge::model_runner::ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first,
-                                                                           model_iter->second, listener);
+  bool status =
+    ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, model_iter->second, listener);
   if (!status) {
     MS_LOG(EXCEPTION) << "Load Task Failed";
   }
   if (ProfilingManager::GetInstance().IsProfiling()) {
-    auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(model_iter->first);
-    auto stream_ids = ge::model_runner::ModelRunner::Instance().GetStreamIdList(model_iter->first);
+    auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first);
+    auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first);
     ProfilingUtils::ReportProfilingData(task_ids, stream_ids, NOT_NULL(graph));
   }
+
+#ifdef ENABLE_DATA_DUMP
+  LaunchDataDump(NOT_NULL(graph));
+#endif
+  if (!ModelRunner::Instance().LoadModelComplete(model_iter->first)) {
+    MS_LOG(ERROR) << "Call ge runtime LoadModelComplete failed";
+    return false;
+  }
   return true;
 }
 
+#ifdef ENABLE_DATA_DUMP
+void AscendKernelRuntime::LaunchDataDump(NotNull<const session::KernelGraph *> graph) {
+  if (!DataDumpParser::GetInstance().DumpEnabled()) {
+    return;
+  }
+  auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph->graph_id());
+  auto data_dumper = std::make_shared<DataDumper>(graph.get(), runtime_info_map);
+  MS_EXCEPTION_IF_NULL(data_dumper);
+  data_dumper->LoadDumpInfo();
+  auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper);
+  if (!ret.second) {
+    MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed";
+  }
+}
+#endif
+
 void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) {
-  auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(graph_id);
+  auto task_ids = ModelRunner::Instance().GetTaskIdList(graph_id);
   auto graph_task_names = ProfilingUtils::graph_kernel_name();
   auto iter = graph_task_names.find(graph_id);
   if (iter != graph_task_names.end()) {
@@ -522,7 +570,7 @@ bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) {
     return false;
   }
 
-  bool status = ge::model_runner::ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors);
+  bool status = ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors);
   if (!status) {
     MS_LOG(ERROR) << "Run task failed";
     DebugTaskIdName(graph->graph_id());
diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h
similarity index 86%
rename from mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
rename to mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h
index 69ba8b295a8..4f1663d4d5a 100644
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h
@@ -19,11 +19,15 @@
 #include <vector>
 #include <string>
 #include <unordered_map>
-#include "device/kernel_runtime.h"
+#include "runtime/device/kernel_runtime.h"
 #include "runtime/context.h"
 #include "framework/ge_runtime/davinci_model.h"
-#include "device/kernel_runtime_manager.h"
-#include "session/session_basic.h"
+#include "runtime/device/kernel_runtime_manager.h"
+#include "backend/session/session_basic.h"
+#ifdef ENABLE_DATA_DUMP
+#include "debug/data_dump_parser.h"
+#include "runtime/device/ascend/dump/data_dumper.h"
+#endif
 
 using ge::model_runner::TaskInfo;
 using std::unordered_map;
@@ -66,6 +70,10 @@ class AscendKernelRuntime : public KernelRuntime {
   bool initialized_{false};
   unordered_map<GraphId, vector<std::shared_ptr<TaskInfo>>> task_map_;
   unordered_map<GraphId, std::shared_ptr<ge::model_runner::DavinciModel>> graph_model_map_;
+#ifdef ENABLE_DATA_DUMP
+  void LaunchDataDump(NotNull<const session::KernelGraph *> graph);
+  unordered_map<GraphId, std::shared_ptr<DataDumper>> graph_data_dumper_;
+#endif
 };
 
 MS_REG_KERNEL_RUNTIME(kAscendDevice, AscendKernelRuntime);
diff --git a/mindspore/ccsrc/device/ascend/ascend_label_assign.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_label_assign.cc
similarity index 98%
rename from mindspore/ccsrc/device/ascend/ascend_label_assign.cc
rename to mindspore/ccsrc/runtime/device/ascend/ascend_label_assign.cc
index 2db81a1725b..035f4dd8e30 100644
--- a/mindspore/ccsrc/device/ascend/ascend_label_assign.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_label_assign.cc
@@ -17,8 +17,8 @@
 #include <vector>
 #include <string>
 #include <set>
-#include "device/ascend/ascend_label_assign.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/ascend/ascend_label_assign.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 static constexpr uint32_t kLabelGotoLabelId = 1;
 static constexpr uint32_t kLabelSwitchLabelId = 2;
diff --git a/mindspore/ccsrc/device/ascend/ascend_label_assign.h b/mindspore/ccsrc/runtime/device/ascend/ascend_label_assign.h
similarity index 97%
rename from mindspore/ccsrc/device/ascend/ascend_label_assign.h
rename to mindspore/ccsrc/runtime/device/ascend/ascend_label_assign.h
index 98055576eb9..6b09f2940e9 100644
--- a/mindspore/ccsrc/device/ascend/ascend_label_assign.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_label_assign.h
@@ -19,7 +19,7 @@
 
 #include <memory>
 #include <map>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include "utils/contract.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc
new file mode 100644
index 00000000000..f9da0850c6c
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc
@@ -0,0 +1,137 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <string>
+#include "runtime/device/ascend/ascend_memory_manager.h"
+#include "runtime/device/ascend/ascend_memory_pool.h"
+#include "utils/context/ms_context.h"
+#include "runtime/mem.h"
+namespace mindspore {
+namespace device {
+namespace ascend {
+constexpr uint64_t kAscendDeviceMemGB = 30;
+constexpr uint64_t kMemSizeGB = 30;
+constexpr uint64_t kAscendDeviceMemSize = (kAscendDeviceMemGB << kMemSizeGB);
+
+void AscendMemoryManager::MallocDeviceMemory() {
+  auto context_mem = GetDeviceMemSizeFromContext();
+  device_mem_size_ = context_mem == 0 ? kAscendDeviceMemSize : context_mem;
+  dynamic_mem_offset_ = device_mem_size_;
+  auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), dynamic_mem_offset_, RT_MEMORY_HBM);
+
+  if (ret != RT_ERROR_NONE) {
+    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << dynamic_mem_offset_ << "] fail, ret[" << ret << "]";
+  }
+
+  AscendMemoryPool::GetInstance().set_device_mem_pool_base(device_mem_base_);
+  AscendMemoryPool::GetInstance().set_graph_dynamic_mem_offset(dynamic_mem_offset_);
+}
+
+uint64_t AscendMemoryManager::GetDeviceMemSizeFromContext() {
+  auto context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context);
+  auto variable_memory_max_size = context->variable_memory_max_size();
+  if (variable_memory_max_size == "0") {
+    return 0;
+  }
+  MS_LOG(INFO) << "context variable_memory_max_size:" << variable_memory_max_size;
+  auto pos = variable_memory_max_size.find('*');
+  if (pos == std::string::npos) {
+    MS_LOG(EXCEPTION) << "Invalid variable_memory_max_size";
+  }
+  auto gb_str = variable_memory_max_size.substr(0, pos);
+  auto gb_var = std::stoull(gb_str);
+  MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var;
+  if (gb_var > kAscendDeviceMemGB || gb_var == 0) {
+    MS_LOG(EXCEPTION) << "Invalid allocate memory size:" << gb_var << " which should be in (0-30]GB";
+  }
+  return gb_var << kMemSizeGB;
+}
+
+void AscendMemoryManager::FreeDeviceMemory() {
+  if (device_mem_base_ != nullptr) {
+    auto ret = rtFree(device_mem_base_);
+    if (ret != RT_ERROR_NONE) {
+      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
+    }
+    device_mem_base_ = nullptr;
+  }
+  if (device_mem_pool_base_ != nullptr) {
+    auto ret = rtFree(device_mem_pool_base_);
+    if (ret != RT_ERROR_NONE) {
+      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
+    }
+    device_mem_pool_base_ = nullptr;
+  }
+}
+
+void AscendMemoryManager::ResetDynamicMemory() {
+  total_dynamic_size_ = 0;
+  dynamic_mem_offset_ = device_mem_size_;
+  AscendMemoryPool::GetInstance().set_graph_dynamic_mem_offset(dynamic_mem_offset_);
+}
+
+void *AscendMemoryManager::MallocMemFromMemPool(size_t size) {
+  auto align_size = GetCommonAlignSize(size);
+  return AscendMemoryPool::GetInstance().AllocTensorMem(align_size);
+}
+
+uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_mem) {
+  size_t align_size = 0;
+  if (communication_mem) {
+    align_size = GetCommunicationAlignSize(size);
+  } else {
+    align_size = GetCommonAlignSize(size);
+  }
+  if (communication_mem) {
+    // create protect area [kMemAlignSize -- data -- kMemAlignSize]
+    uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
+    return alloc_address + kMemAlignSize;
+  } else {
+    return reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
+  }
+}
+
+uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
+  size_t align_size = 0;
+  if (communication_mem) {
+    align_size = GetCommunicationAlignSize(size);
+  } else {
+    align_size = GetCommonAlignSize(size);
+  }
+  if (dynamic_mem_offset_ < align_size) {
+    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "] (dynamic[" << total_dynamic_size_
+                      << "]) malloc [" << align_size << "] failed!";
+  }
+  auto new_offset = dynamic_mem_offset_ - align_size;
+  auto device_mem_pool_offset = AscendMemoryPool::GetInstance().device_mem_pool_offset();
+  if (new_offset <= device_mem_pool_offset) {
+    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "] (dynamic[" << total_dynamic_size_
+                      << "] memory pool[" << device_mem_pool_offset << "])"
+                      << " malloc [" << align_size << "] failed!";
+  }
+  total_dynamic_size_ += align_size;
+  dynamic_mem_offset_ = new_offset;
+  AscendMemoryPool::GetInstance().set_graph_dynamic_mem_offset(dynamic_mem_offset_);
+  if (communication_mem) {
+    // create protect area [kMemAlignSize -- data -- kMemAlignSize]
+    return device_mem_base_ + new_offset + kMemAlignSize;
+  } else {
+    return device_mem_base_ + new_offset;
+  }
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h
similarity index 84%
rename from mindspore/ccsrc/device/ascend/ascend_memory_manager.h
rename to mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h
index 7fdd8f553e5..720f15be008 100644
--- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h
@@ -16,7 +16,7 @@
 
 #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
 #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
-#include "device/memory_manager.h"
+#include "runtime/device/memory_manager.h"
 namespace mindspore {
 namespace device {
 namespace ascend {
@@ -27,8 +27,13 @@ class AscendMemoryManager : public MemoryManager {
 
   void MallocDeviceMemory() override;
   void FreeDeviceMemory() override;
+  void ResetDynamicMemory() override;
   void *MallocMemFromMemPool(size_t size) override;
 
+ protected:
+  uint8_t *MallocStaticMem(size_t size, bool communication_mem) override;
+  uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override;
+
  private:
   uint8_t *device_mem_pool_base_{nullptr};
   uint64_t device_mem_pool_size_{0};
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc
new file mode 100644
index 00000000000..fe71ba43fc4
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "runtime/device/ascend/ascend_memory_pool.h"
+#include "runtime/device/ascend/ascend_kernel_runtime.h"
+#include "utils/log_adapter.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
+  if (size == 0) {
+    MS_LOG(EXCEPTION) << "Can not alloc memory size(0) in memory pool !";
+  }
+  if (device_mem_pool_offset_ + size >= graph_dynamic_mem_offset_) {
+    MS_LOG(EXCEPTION) << "Failed to alloc memory pool memory, the current device_mem_pool_offset_ ["
+                      << device_mem_pool_offset_ << "], current graph_dynamic_mem_offset_ " << graph_dynamic_mem_offset_
+                      << "], need memory size [" << size << "]";
+  }
+  *addr = device_mem_pool_base_ + device_mem_pool_offset_;
+  device_mem_pool_offset_ += size;
+  if (*addr == nullptr) {
+    MS_LOG(EXCEPTION) << "Alloc device address is nullptr, failed to alloc memory pool memory!";
+  }
+  return size;
+}
+
+bool AscendMemoryPool::FreeDeviceMem(const DeviceMemPtr &addr) {
+  MS_EXCEPTION_IF_NULL(addr);
+  return true;
+}
+
+size_t AscendMemoryPool::AlignMemorySize(size_t size) const {
+  if (size == 0) {
+    MS_LOG(EXCEPTION) << "The align memory size is a zero !";
+  }
+  return size;
+}
+
+void AscendMemoryPool::set_device_mem_pool_base(uint8_t *device_mem_pool_base) {
+  MS_EXCEPTION_IF_NULL(device_mem_pool_base);
+  device_mem_pool_base_ = device_mem_pool_base;
+}
+
+void AscendMemoryPool::set_graph_dynamic_mem_offset(uint64_t graph_dynamic_mem_offset) {
+  graph_dynamic_mem_offset_ = graph_dynamic_mem_offset;
+}
+
+uint64_t AscendMemoryPool::device_mem_pool_offset() const { return device_mem_pool_offset_; }
+
+size_t AscendMemoryPool::free_mem_size() {
+  if (graph_dynamic_mem_offset_ < device_mem_pool_offset_) {
+    MS_LOG(EXCEPTION) << "graph dynamic mem offset [" << graph_dynamic_mem_offset_
+                      << "] less than device mem pool offset [" << device_mem_pool_offset_ << "]!";
+  }
+  return graph_dynamic_mem_offset_ - device_mem_pool_offset_;
+}
+
+size_t AscendMemoryPool::total_mem_size() { return graph_dynamic_mem_offset_ == 0 ? 0 : graph_dynamic_mem_offset_ - 1; }
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_pool.h b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.h
similarity index 78%
rename from mindspore/ccsrc/device/ascend/ascend_memory_pool.h
rename to mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.h
index 7fa3ebc23e8..7a75198ab4d 100644
--- a/mindspore/ccsrc/device/ascend/ascend_memory_pool.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_
 
 #include <memory>
-#include "pre_activate/mem_reuse/mem_dynamic_allocator.h"
+#include "backend/optimizer/mem_reuse/mem_dynamic_allocator.h"
 
 namespace mindspore {
 namespace device {
@@ -32,11 +32,9 @@ class AscendMemoryPool : public DynamicMemPoolBestFit {
   size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override;
   bool FreeDeviceMem(const DeviceMemPtr &addr) override;
   void set_device_mem_pool_base(uint8_t *device_mem_pool_base);
-  void set_device_mem_pool_size(uint64_t device_mem_pool_size) {
-    device_mem_pool_size_ = device_mem_pool_size;
-    free_mem_size_ = device_mem_pool_size_;
-    total_mem_size_ = free_mem_size_;
-  }
+  void set_graph_dynamic_mem_offset(uint64_t graph_dynamic_mem_offset);
+
+  uint64_t device_mem_pool_offset() const;
   size_t free_mem_size() override;
   size_t total_mem_size() override;
 
@@ -48,16 +46,12 @@ class AscendMemoryPool : public DynamicMemPoolBestFit {
  protected:
   // The real size by memory alloc aligned.
   size_t AlignMemorySize(size_t size) const override;
-  // Get the minimum memory unit size using for dynamic extend.
-  size_t mem_alloc_unit_size() const override;
 
  private:
   AscendMemoryPool() = default;
-  bool has_malloc_{false};
   uint8_t *device_mem_pool_base_{nullptr};
-  uint64_t device_mem_pool_size_{0};
-  size_t free_mem_size_{0};
-  size_t total_mem_size_{0};
+  uint64_t device_mem_pool_offset_{0};
+  uint64_t graph_dynamic_mem_offset_{0};
 };
 }  // namespace ascend
 }  // namespace device
diff --git a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
similarity index 76%
rename from mindspore/ccsrc/device/ascend/ascend_stream_assign.cc
rename to mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
index 736d6203e9c..7cf5b94d45c 100644
--- a/mindspore/ccsrc/device/ascend/ascend_stream_assign.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/ascend/ascend_stream_assign.h"
+#include "runtime/device/ascend/ascend_stream_assign.h"
 
 #include <algorithm>
 #include <utility>
@@ -22,10 +22,10 @@
 #include "ir/manager.h"
 #include "utils/context/ms_context.h"
 #include "common/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_adjust.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_adjust.h"
 #include "predict/generator/utils/ir_model_util.h"
-#include "pre_activate/common/helper.h"
+#include "backend/optimizer/common/helper.h"
 #include "utils/utils.h"
 
 namespace mindspore {
@@ -48,6 +48,12 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr)
     CheckResourceAssign(graph_ptr);
     MS_LOG(INFO) << "After finish stream assign";
 
+    FindStreamRelations(graph_ptr);
+    PrintStreamRelations();
+    GetStreamRelations();
+    PrintStreamGroups();
+    FindEventRelations(graph_ptr);
+
     // Get info for D Model
     AscendResourceMng &resource_manager = AscendResourceMng::GetInstance();
     generator::IRModelUtil::GetInstance().set_event_num(resource_manager.get_cur_event_num());
@@ -501,6 +507,8 @@ void AscendStreamAssign::InsertEventHcomDependCommon(const NotNull<KernelGraphPt
       auto recv = CreateRecvApplyKernel(graph_ptr, cur_event_id, cur_stream_id);
       cnodes.emplace_back(recv);
       cnodes.emplace_back(cur_cnode_ptr);
+    } else {
+      cnodes.emplace_back(cur_cnode_ptr);
     }
     pre_stream_id = cur_stream_id;
   }
@@ -910,7 +918,351 @@ void AscendStreamAssign::Reset() {
   common_stream_map_.clear();
   processed_streams_.clear();
   need_first_active_streams_.clear();
+  stream_groups_.clear();
+  stream_relations_.clear();
+  event_map_.clear();
 }
+
+// section 10
+bool AscendStreamAssign::IsVecExist(std::vector<uint32_t> *group) {
+  auto group_size = group->size();
+  if (group_size == 0) {
+    return false;
+  }
+  for (const auto &item : stream_groups_) {
+    if (item.size() < group->size()) {
+      continue;
+    }
+
+    bool flag = true;
+    for (size_t i = 0; i < group_size; i++) {
+      if (item[i] != group->at(i)) {
+        flag = false;
+        break;
+      }
+    }
+
+    if (flag) {
+      return true;
+    } else {
+      continue;
+    }
+  }
+
+  return false;
+}
+
+void AscendStreamAssign::DFS(uint32_t start, std::vector<uint32_t> *group) {
+  auto it = stream_relations_.find(start);
+  if (it == stream_relations_.end()) {
+    if (!IsVecExist(group)) {
+      stream_groups_.emplace_back(*group);
+    } else {
+      MS_LOG(WARNING) << "DFS should not print this log";
+    }
+    return;
+  }
+
+  vector<uint32_t> active_streams = stream_relations_[start];
+
+  for (const auto &item : active_streams) {
+    group->emplace_back(item);
+    DFS(item, group);
+    group->pop_back();
+  }
+}
+
+void AscendStreamAssign::GetStreamRelations() {
+  for (const auto &start : need_first_active_streams_) {
+    vector<uint32_t> group{start};
+    DFS(start, &group);
+  }
+}
+
+void AscendStreamAssign::FindStreamRelations(const NotNull<KernelGraphPtr> &graph_ptr) {
+  AscendResourceMng &resource_manager = AscendResourceMng::GetInstance();
+  auto stream_num = resource_manager.get_cur_stream_num();
+  if (stream_num <= 1) {
+    return;
+  }
+
+  auto exe_orders = graph_ptr->execution_order();
+  for (size_t i = 0; i < exe_orders.size(); i++) {
+    auto cur_cnode = exe_orders[i];
+    auto name = AnfAlgo::GetCNodeName(cur_cnode);
+    if (name != kStreamSwitchOpName && name != kStreamActiveOpName) {
+      continue;
+    }
+
+    // support:streamswitch is begin of the stream
+    if (name == kStreamSwitchOpName) {
+      GetStreamSwitchStreamRelation(cur_cnode);
+    }
+
+    if (name == kStreamActiveOpName) {
+      GetStreamActiveStreamRelation(graph_ptr, i);
+    }
+  }
+}
+
+void AscendStreamAssign::GetStreamSwitchStreamRelation(const CNodePtr &node_ptr) {
+  MS_EXCEPTION_IF_NULL(node_ptr);
+  auto cur_stream_id = AnfAlgo::GetStreamId(node_ptr);
+  auto true_stream_id = AnfAlgo::GetNodeAttr<uint32_t>(node_ptr, kAttrTrueBranchStream);
+  if (true_stream_id <= cur_stream_id) {
+    MS_LOG(ERROR) << "StreamSwitch self stream id " << cur_stream_id
+                  << " is greater than true branch stream id:" << true_stream_id;
+  }
+  auto it = stream_relations_.find(cur_stream_id);
+  if (it == stream_relations_.end()) {
+    stream_relations_[cur_stream_id] = {true_stream_id};
+  } else {
+    auto iter =
+      std::find(stream_relations_[cur_stream_id].begin(), stream_relations_[cur_stream_id].end(), true_stream_id);
+    if (iter == stream_relations_[cur_stream_id].end()) {
+      stream_relations_[cur_stream_id].emplace_back(true_stream_id);
+    }
+  }
+}
+
+void AscendStreamAssign::GetStreamActiveStreamRelation(const NotNull<KernelGraphPtr> &graph_ptr, size_t index) {
+  StreamActiveKind kind = GetStreamActiveKind(graph_ptr, index);
+  if (kind == kInvalid) {
+    MS_LOG(INFO) << "Invalid streamActive kind";
+    return;
+  }
+
+  auto orders = graph_ptr->execution_order();
+  auto cur_cnode = orders[index];
+  auto cur_stream_id = AnfAlgo::GetStreamId(cur_cnode);
+  auto active_list = AnfAlgo::GetNodeAttr<vector<uint32_t>>(cur_cnode, kAttrActiveStreamList);
+  if (kind == kHead) {
+    uint32_t active_current_node = GetStreamByActivedStream(cur_stream_id);
+    if (active_current_node == kInvalidStreamId) {
+      MS_LOG(EXCEPTION) << "No stream to active streamactive stream";
+    }
+
+    for (const auto &item : active_list) {
+      if (item <= active_current_node) {
+        MS_LOG(WARNING) << "Actived stream is less than activing stream";
+        continue;
+      }
+      auto it =
+        std::find(stream_relations_[active_current_node].begin(), stream_relations_[active_current_node].end(), item);
+      if (it == stream_relations_[active_current_node].end()) {
+        stream_relations_[active_current_node].emplace_back(item);
+      }
+    }
+  }
+
+  if (kind == kMiddle) {
+    for (const auto &stream : active_list) {
+      if (stream <= cur_stream_id) {
+        MS_LOG(INFO) << "MIDDLE StreamActive active stream is less than self stream, no need deal";
+      } else {
+        MS_LOG(ERROR) << "MIDDLE StreamActive active stream is greater than self stream, should not be exit now";
+      }
+    }
+  }
+
+  if (kind == kTail) {
+    auto it = stream_relations_.find(cur_stream_id);
+    if (it == stream_relations_.end()) {
+      stream_relations_[cur_stream_id] = active_list;
+    } else {
+      for (const auto &stream : active_list) {
+        if (stream <= cur_stream_id) {
+          MS_LOG(WARNING) << "Actived stream is less than activing stream";
+          continue;
+        }
+        auto iter = std::find(stream_relations_[cur_stream_id].begin(), stream_relations_[cur_stream_id].end(), stream);
+        if (iter == stream_relations_[cur_stream_id].end()) {
+          stream_relations_[cur_stream_id].emplace_back(stream);
+        }
+      }
+    }
+  }
+}
+
+StreamActiveKind AscendStreamAssign::GetStreamActiveKind(const NotNull<KernelGraphPtr> &graph_ptr, size_t index) {
+  auto exe_orders = graph_ptr->execution_order();
+  if (index >= exe_orders.size()) {
+    MS_LOG(EXCEPTION) << "Invalid op index:" << index;
+  }
+
+  auto cur_cnode = exe_orders[index];
+  auto cur_stream_id = AnfAlgo::GetStreamId(cur_cnode);
+  if (AnfAlgo::GetCNodeName(cur_cnode) != kStreamActiveOpName) {
+    MS_LOG(EXCEPTION) << "Current node name is not StreamActive";
+  }
+
+  if (index == 0) {
+    return kInvalid;
+  }
+
+  if (index == exe_orders.size() - 1) {
+    return kInvalid;
+  }
+
+  uint32_t pre_stream_id = UINT32_MAX;
+  uint32_t next_stream_id = UINT32_MAX;
+  int32_t start = SizeToInt(index) - 1;
+  for (int32_t i = start; i >= 0; i--) {
+    auto cnode = exe_orders[IntToSize(i)];
+    auto name = AnfAlgo::GetCNodeName(cnode);
+    if (name == kSendOpName || name == kRecvOpName) {
+      continue;
+    }
+
+    pre_stream_id = AnfAlgo::GetStreamId(cnode);
+    break;
+  }
+
+  for (size_t i = index + 1; i < exe_orders.size(); i++) {
+    auto cnode = exe_orders[i];
+    auto name = AnfAlgo::GetCNodeName(cnode);
+    if (name == kSendOpName || name == kRecvOpName) {
+      continue;
+    }
+
+    next_stream_id = AnfAlgo::GetStreamId(cnode);
+    break;
+  }
+
+  // pre_stream_id = UINT32_MAX:means no node active current StreamActive
+  // next_stream_id = UINT32_MAX:means current StreamActive active no node
+  if (pre_stream_id == UINT32_MAX || next_stream_id == UINT32_MAX) {
+    return kInvalid;
+  }
+
+  if (cur_stream_id == pre_stream_id && cur_stream_id == next_stream_id) {
+    return kMiddle;
+  }
+
+  if (cur_stream_id == pre_stream_id) {
+    return kTail;
+  }
+
+  if (cur_stream_id == next_stream_id) {
+    return kHead;
+  }
+
+  return kInvalid;
+}
+
+uint32_t AscendStreamAssign::GetStreamByActivedStream(uint32_t actived_stream_id) {
+  if (stream_relations_.empty()) {
+    return kInvalidStreamId;
+  }
+
+  for (const auto &item : stream_relations_) {
+    auto it = std::find(item.second.begin(), item.second.end(), actived_stream_id);
+    if (it != item.second.end()) {
+      return item.first;
+    }
+  }
+
+  return kInvalidStreamId;
+}
+
+void AscendStreamAssign::PrintStreamRelations() {
+  MS_LOG(INFO) << "Stream relations size:" << stream_relations_.size();
+  for (const auto &item : stream_relations_) {
+    MS_LOG(INFO) << "Stream:" << item.first;
+    for (const auto &stream : item.second) {
+      MS_LOG(INFO) << "--actived stream id:" << stream;
+    }
+  }
+}
+
+void AscendStreamAssign::PrintStreamGroups() {
+  MS_LOG(INFO) << "Stream group size:" << stream_groups_.size();
+  for (const auto &item : stream_groups_) {
+    MS_LOG(INFO) << "Group:";
+    for (const auto &stream : item) {
+      MS_LOG(INFO) << "Stream id:" << stream;
+    }
+  }
+}
+
+// section 11
+bool AscendStreamAssign::IsSatisfiedEvent(uint32_t send_stream_id, uint32_t recv_stream_id) const {
+  size_t send_group = 0;
+  size_t recv_group = 0;
+  bool send_flag = true;
+  bool recv_flag = true;
+  for (size_t i = 0; i < stream_groups_.size(); i++) {
+    auto group = stream_groups_[i];
+    if (send_flag) {
+      auto it = std::find(group.begin(), group.end(), send_stream_id);
+      if (it != group.end()) {
+        send_group = i;
+        send_flag = false;
+      }
+    }
+
+    if (recv_flag) {
+      auto it = std::find(group.begin(), group.end(), recv_stream_id);
+      if (it != group.end()) {
+        recv_group = i;
+        recv_flag = false;
+      }
+    }
+  }
+
+  if (!(send_flag || recv_flag)) {
+    return (send_group != recv_group);
+  }
+
+  return false;
+}
+
+void AscendStreamAssign::FindEventRelations(const NotNull<KernelGraphPtr> &graph_ptr) {
+  AscendResourceMng &resource_manager = AscendResourceMng::GetInstance();
+  auto event_nums = resource_manager.get_cur_event_num();
+  if (event_nums == 0) {
+    return;
+  }
+  auto exe_orders = graph_ptr->execution_order();
+  // find all event info
+  for (size_t i = 0; i < exe_orders.size(); i++) {
+    auto cur_cnode = exe_orders[i];
+    auto name = AnfAlgo::GetCNodeName(cur_cnode);
+    if (name == kSendOpName) {
+      event_map_[cur_cnode] = {};
+    }
+
+    if (name == kRecvOpName) {
+      auto recv_event_id = AnfAlgo::GetNodeAttr<uint32_t>(cur_cnode, kAttrEventId);
+      for (auto &item : event_map_) {
+        auto send_event_id = AnfAlgo::GetNodeAttr<uint32_t>(item.first, kAttrEventId);
+        if (recv_event_id == send_event_id) {
+          item.second = cur_cnode;
+          break;
+        }
+      }
+    }
+  }
+
+  // delete useless event info
+  auto begin = event_map_.begin();
+  while (begin != event_map_.end()) {
+    auto send_stream_id = AnfAlgo::GetStreamId(begin->first);
+    auto recv_stream_id = AnfAlgo::GetStreamId(begin->second);
+    bool flag = IsSatisfiedEvent(send_stream_id, recv_stream_id);
+    if (!flag) {
+      begin = event_map_.erase(begin);
+    } else {
+      begin++;
+    }
+  }
+
+  MS_LOG(INFO) << "Satisfied event info";
+  for (const auto &item : event_map_) {
+    MS_LOG(INFO) << "Event_id:" << AnfAlgo::GetNodeAttr<uint32_t>(item.first, kAttrEventId);
+  }
+}
+
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/ascend/ascend_stream_assign.h b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h
similarity index 83%
rename from mindspore/ccsrc/device/ascend/ascend_stream_assign.h
rename to mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h
index 625ab6ad6e1..00fca60e8d1 100644
--- a/mindspore/ccsrc/device/ascend/ascend_stream_assign.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h
@@ -28,7 +28,7 @@
 #include "runtime/base.h"
 #include "runtime/rt_model.h"
 #include "runtime/stream.h"
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include "utils/contract.h"
 
 namespace mindspore {
@@ -94,6 +94,7 @@ class AscendResourceMng {
   uint32_t cur_event_num_{0};
 };
 
+enum StreamActiveKind { kInvalid = 0, kHead, kMiddle, kTail };
 class AscendStreamAssign {
  public:
   static AscendStreamAssign &GetInstance() {
@@ -109,6 +110,8 @@ class AscendStreamAssign {
   void GetWaitStreams(vector<uint32_t> *wait_active_stream_list);
   CNodePtr CreateSendApplyKernel(const NotNull<KernelGraphPtr> &graph_ptr, uint32_t event_id, uint32_t stream_id);
   CNodePtr CreateRecvApplyKernel(const NotNull<KernelGraphPtr> &graph_ptr, uint32_t event_id, uint32_t stream_id);
+  const std::vector<std::vector<uint32_t>> &get_stream_group() const { return stream_groups_; }
+  const std::map<CNodePtr, CNodePtr> &get_event_map() const { return event_map_; }
 
  private:
   AscendStreamAssign() = default;
@@ -147,6 +150,20 @@ class AscendStreamAssign {
                                           const CNodePtr &node);
   void GetParallelStream(uint32_t cur_stream_id, uint32_t stream_acitve_id, std::vector<uint32_t> *parallel_streams);
 
+  // function for memory resue
+  void GetStreamRelations();
+  void DFS(uint32_t start, std::vector<uint32_t> *group);
+  bool IsVecExist(std::vector<uint32_t> *group);
+  void FindStreamRelations(const NotNull<KernelGraphPtr> &graph_ptr);
+  void GetStreamSwitchStreamRelation(const CNodePtr &node_ptr);
+  void GetStreamActiveStreamRelation(const NotNull<KernelGraphPtr> &graph_ptr, size_t index);
+  StreamActiveKind GetStreamActiveKind(const NotNull<KernelGraphPtr> &graph_ptr, size_t index);
+  uint32_t GetStreamByActivedStream(uint32_t actived_stream_id);
+  void PrintStreamRelations();
+  void PrintStreamGroups();
+  void FindEventRelations(const NotNull<KernelGraphPtr> &graph_ptr);
+  bool IsSatisfiedEvent(uint32_t send_stream_id, uint32_t recv_stream_id) const;
+
   bool independent_stream_activated_{false};
   bool hcom_stream_activated_{false};
   std::map<uint32_t, uint32_t> independent_stream_map_{};
@@ -154,6 +171,11 @@ class AscendStreamAssign {
   std::map<uint32_t, uint32_t> common_stream_map_{};
   std::set<uint32_t> processed_streams_{};
   std::vector<uint32_t> need_first_active_streams_{};
+
+  // attr for memory copy reuse
+  std::map<uint32_t, std::vector<uint32_t>> stream_relations_{};
+  std::vector<std::vector<uint32_t>> stream_groups_{};
+  std::map<CNodePtr, CNodePtr> event_map_;
   // new policy end
 };
 }  // namespace ascend
diff --git a/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc b/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc
new file mode 100644
index 00000000000..ab2c6b27486
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc
@@ -0,0 +1,282 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifdef ENABLE_DATA_DUMP
+#include "runtime/device/ascend/dump/data_dumper.h"
+
+#include <map>
+#include <memory>
+#include <string>
+#include "utility"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/mem.h"
+#include "runtime/kernel.h"
+#include "runtime/device/ascend/dump/ge_dump.h"
+#include "proto/op_mapping_info.pb.h"
+#include "utils/context/ms_context.h"
+#include "debug/data_dump_parser.h"
+
+constexpr uint32_t kAicpuLoadFlag = 1;
+constexpr uint32_t kAicpuUnloadFlag = 0;
+constexpr uint32_t kTupleTaskId = 0;
+constexpr uint32_t kTupleStreamId = 1;
+constexpr uint32_t kTupleArgs = 2;
+constexpr uint32_t kCurrentStepTensorIndex = 0;
+constexpr uint32_t kCurrentEpochTensorIndex = 1;
+constexpr uint32_t kStepsPerEpochTensorIndex = 2;
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
+void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
+void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr);
+
+DataDumper::~DataDumper() {
+  ReleaseDevMem(&dev_load_mem_);
+  ReleaseDevMem(&dev_unload_mem_);
+}
+
+void DataDumper::LoadDumpInfo() {
+  MS_LOG(INFO) << "[DataDump] LoadDumpInfo start";
+  MS_EXCEPTION_IF_NULL(kernel_graph_);
+  aicpu::dump::OpMappingInfo dump_info;
+  SetOpMappingInfo(NOT_NULL(&dump_info));
+
+  auto kernels = kernel_graph_->execution_order();
+  for (const auto &kernel : kernels) {
+    MS_EXCEPTION_IF_NULL(kernel);
+    if (!KernelNeedDump(kernel)) {
+      continue;
+    }
+    MS_LOG(INFO) << "[DataDump] LoadDumpInfo kernel:" << kernel->fullname_with_scope();
+    dump_kernel_names_.emplace_back(kernel->fullname_with_scope());
+
+    aicpu::dump::Task task;
+    ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task));
+    MS_EXCEPTION_IF_NULL(dump_info.mutable_task());
+    dump_info.mutable_task()->Add(std::move(task));
+  }
+  RtLoadDumpData(dump_info, &dev_load_mem_);
+  load_flag_ = true;
+  MS_LOG(INFO) << "[DataDump] LoadDumpInfo end";
+}
+
+void DataDumper::SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  MS_EXCEPTION_IF_NULL(kernel_graph_);
+  auto dump_path = DataDumpParser::GetInstance().GetDumpPath();
+  if (!dump_path.has_value()) {
+    MS_LOG(EXCEPTION) << "Dump path invalid";
+  }
+  auto device_id = context_ptr->device_id();
+  dump_info->set_dump_path(dump_path.value() + "_" + std::to_string(device_id) + "/");
+  MS_LOG(INFO) << "[DataDump] dump_path:" << dump_path.value();
+
+  dump_info->set_model_name(DataDumpParser::GetInstance().net_name() + "_" + std::to_string(kernel_graph_->graph_id()));
+  dump_info->set_dump_step(std::to_string(DataDumpParser::GetInstance().dump_step()));
+  dump_info->set_model_id(kernel_graph_->graph_id());
+  dump_info->set_flag(kAicpuLoadFlag);
+
+  const auto &input_ctrl_tensors = kernel_graph_->input_ctrl_tensors();
+  if (input_ctrl_tensors == nullptr || input_ctrl_tensors->size() < 3) {
+    MS_LOG(INFO) << "[DataDump] Not data sink mode, input_ctrl_tensor";
+    return;
+  }
+  const auto &current_step_tensor = input_ctrl_tensors->at(kCurrentStepTensorIndex);
+  const auto &currnet_epoch_tensor = input_ctrl_tensors->at(kCurrentEpochTensorIndex);
+  const auto &steps_per_epoch_tensor = input_ctrl_tensors->at(kStepsPerEpochTensorIndex);
+
+  MS_EXCEPTION_IF_NULL(current_step_tensor);
+  MS_EXCEPTION_IF_NULL(currnet_epoch_tensor);
+  MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor);
+  MS_EXCEPTION_IF_NULL(current_step_tensor->device_address());
+  MS_EXCEPTION_IF_NULL(currnet_epoch_tensor->device_address());
+  MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor->device_address());
+
+  void *current_step = current_step_tensor->device_address()->ptr_;
+  void *current_epoch = currnet_epoch_tensor->device_address()->ptr_;
+  void *steps_per_epoch = steps_per_epoch_tensor->device_address()->ptr_;
+
+  if (current_epoch != nullptr && current_step != nullptr && steps_per_epoch != nullptr) {
+    dump_info->set_step_id_addr(reinterpret_cast<uint64_t>(current_epoch));
+    dump_info->set_loop_cond_addr(reinterpret_cast<uint64_t>(current_step));
+    dump_info->set_iterations_per_loop_addr(reinterpret_cast<uint64_t>(steps_per_epoch));
+  } else {
+    MS_LOG(INFO) << "Invalid ctrl tensor device address";
+  }
+}
+
+bool DataDumper::KernelNeedDump(const CNodePtr &kernel) const {
+  if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL &&
+      AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) {
+    return false;
+  }
+  MS_EXCEPTION_IF_NULL(kernel);
+  // dump all kernel if mode is set 0 in data_dump.json
+  return DataDumpParser::GetInstance().NeedDump(kernel->fullname_with_scope());
+}
+
+void DataDumper::UnloadDumpInfo() {
+  if (!load_flag_) {
+    MS_LOG(WARNING) << "Load not success, no need to unload";
+    return;
+  }
+  MS_EXCEPTION_IF_NULL(kernel_graph_);
+  MS_LOG(INFO) << "[DataDump] UnloadDumpInfo start. graphId:" << kernel_graph_->graph_id();
+
+  aicpu::dump::OpMappingInfo op_mapping_info;
+  op_mapping_info.set_model_id(kernel_graph_->graph_id());
+  op_mapping_info.set_flag(kAicpuUnloadFlag);
+
+  for (const auto &kernel_name : dump_kernel_names_) {
+    aicpu::dump::Task task;
+    auto iter = runtime_info_map_.find(kernel_name);
+    if (iter == runtime_info_map_.end()) {
+      MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
+    }
+    MS_EXCEPTION_IF_NULL(iter->second);
+    auto task_id = std::get<kTupleTaskId>(*iter->second);
+    task.set_task_id(task_id);
+    MS_EXCEPTION_IF_NULL(op_mapping_info.mutable_task());
+    op_mapping_info.mutable_task()->Add(std::move(task));
+  }
+
+  RtLoadDumpData(op_mapping_info, &dev_unload_mem_);
+}
+
+void DataDumper::ReleaseDevMem(void **ptr) const {
+  if (ptr == nullptr) {
+    return;
+  }
+  if (*ptr != nullptr) {
+    rtError_t rt_error = rtFree(*ptr);
+    if (rt_error != RT_ERROR_NONE) {
+      MS_LOG(ERROR) << "[DataDump] Call rtFree failed, ret:" << rt_error;
+    }
+    *ptr = nullptr;
+  }
+}
+
+void DataDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const {
+  dump_task->set_end_graph(false);
+  auto iter = runtime_info_map_.find(kernel->fullname_with_scope());
+  if (iter == runtime_info_map_.end()) {
+    MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
+  }
+  MS_EXCEPTION_IF_NULL(iter->second);
+  auto task_id = std::get<kTupleTaskId>(*iter->second);
+  auto stream_id = std::get<kTupleStreamId>(*iter->second);
+  auto args = std::get<kTupleArgs>(*iter->second);
+  MS_LOG(INFO) << "[DataDump] Get runtime info task_id:" << task_id << " stream_id:" << stream_id;
+
+  dump_task->set_task_id(task_id);
+  dump_task->set_stream_id(stream_id);
+  MS_EXCEPTION_IF_NULL(dump_task->mutable_op());
+  dump_task->mutable_op()->set_op_name(kernel->fullname_with_scope());
+  dump_task->mutable_op()->set_op_type(AnfAlgo::GetCNodeName(kernel.get()));
+
+  DumpKernelOutput(kernel, args, dump_task);
+  DumpKernelInput(kernel, args, dump_task);
+}
+
+void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) {
+  std::string proto_str;
+  size_t proto_size = dump_info.ByteSizeLong();
+  bool ret = dump_info.SerializeToString(&proto_str);
+  if (!ret || proto_size == 0) {
+    MS_LOG(EXCEPTION) << "[DataDump] Protobuf SerializeToString failed, proto size %zu.";
+  }
+
+  rtError_t rt_ret = rtMalloc(ptr, proto_size, RT_MEMORY_HBM);
+  if (rt_ret != RT_ERROR_NONE) {
+    MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed";
+  }
+
+  if (ptr == nullptr) {
+    MS_LOG(ERROR) << "[DataDump] rtMalloc failed, ptr is nullptr";
+    return;
+  }
+  rt_ret = rtMemcpy(*ptr, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
+  if (rt_ret != RT_ERROR_NONE) {
+    MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed";
+  }
+
+  MS_LOG(INFO) << "[DataDump] rtDatadumpInfoLoad start";
+  rt_ret = rtDatadumpInfoLoad(*ptr, proto_size);
+  if (rt_ret != RT_ERROR_NONE) {
+    MS_LOG(EXCEPTION) << "[DataDump] Call rtDatadumpInfoLoad failed";
+  }
+}
+
+void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
+  MS_LOG(INFO) << "[DataDump] DumpKernelOutput start. Kernel:" << kernel->fullname_with_scope();
+  auto input_size = AnfAlgo::GetInputTensorNum(kernel);
+  auto output_size = AnfAlgo::GetOutputTensorNum(kernel);
+  uint64_t offset = sizeof(void *) * input_size;
+  for (size_t i = 0; i < output_size; ++i) {
+    auto data_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
+    auto output_format = AnfAlgo::GetOutputFormat(kernel, i);
+    auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel, i);
+
+    aicpu::dump::Output output;
+    output.set_data_type(GetGeDataType(data_type));
+    output.set_format(GetGeFormat(output_format, output_shape.size()));
+    MS_EXCEPTION_IF_NULL(output.mutable_shape());
+    for (auto dim : output_shape) {
+      output.mutable_shape()->add_dim(dim);
+    }
+    output.set_original_output_format(GetGeFormat(output_format, output_shape.size()));
+    output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
+    MS_EXCEPTION_IF_NULL(task->mutable_output());
+    task->mutable_output()->Add(std::move(output));
+    offset += sizeof(void *);
+  }
+}
+
+void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
+  MS_LOG(INFO) << "[DataDump] DumpKernelInput start. Kernel:" << kernel->fullname_with_scope();
+  auto input_size = AnfAlgo::GetInputTensorNum(kernel);
+  uint64_t offset = 0;
+  for (size_t i = 0; i < input_size; ++i) {
+    aicpu::dump::Input input;
+    auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
+    auto input_node = input_node_with_index.first;
+    auto input_index = input_node_with_index.second;
+    std::string output_format = AnfAlgo::GetOutputFormat(input_node, input_index);
+    auto output_type = AnfAlgo::GetOutputDeviceDataType(input_node, input_index);
+    if (output_type == kTypeUnknown) {
+      MS_LOG(WARNING) << "[DataDump] It is not suggested to use a lonely weight parameter as the output of graph";
+      output_type = AnfAlgo::GetOutputInferDataType(input_node, input_index);
+    }
+    auto output_shape = AnfAlgo::GetOutputDeviceShape(input_node, input_index);
+
+    input.set_data_type(GetGeDataType(output_type));
+    input.set_format(GetGeFormat(output_format, output_shape.size()));
+    MS_EXCEPTION_IF_NULL(input.mutable_shape());
+    for (auto dim : output_shape) {
+      input.mutable_shape()->add_dim(dim);
+    }
+    input.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
+    MS_EXCEPTION_IF_NULL(task->mutable_input());
+    task->mutable_input()->Add(std::move(input));
+    offset += sizeof(void *);
+  }
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+#endif
diff --git a/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.h b/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.h
new file mode 100644
index 00000000000..d99eb4db686
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.h
@@ -0,0 +1,69 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
+#ifdef ENABLE_DATA_DUMP
+#include <tuple>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include "backend/session/kernel_graph.h"
+
+namespace aicpu {
+namespace dump {
+class OpMappingInfo;
+class Task;
+}  // namespace dump
+}  // namespace aicpu
+namespace mindspore {
+namespace device {
+namespace ascend {
+// tuple(op_name, task_id, stream_id, args)
+using RuntimeInfo = std::tuple<uint32_t, uint32_t, void *>;
+class DataDumper {
+ public:
+  DataDumper(const session::KernelGraph *kernel_graph,
+             const std::map<std::string, std::shared_ptr<RuntimeInfo>> &runtime_info_map)
+      : load_flag_(false),
+        dev_load_mem_(nullptr),
+        dev_unload_mem_(nullptr),
+        kernel_graph_(kernel_graph),
+        runtime_info_map_(runtime_info_map) {}
+  ~DataDumper();
+  void LoadDumpInfo();
+
+  void UnloadDumpInfo();
+
+ private:
+  void ReleaseDevMem(void **ptr) const;
+  bool KernelNeedDump(const CNodePtr &kernel) const;
+  void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const;
+  void ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const;
+
+  bool load_flag_;
+  void *dev_load_mem_;
+  void *dev_unload_mem_;
+  std::vector<std::string> dump_kernel_names_;
+  const session::KernelGraph *kernel_graph_;
+  std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map_;
+};
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+#endif
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
diff --git a/mindspore/ccsrc/runtime/device/ascend/dump/ge_dump.h b/mindspore/ccsrc/runtime/device/ascend/dump/ge_dump.h
new file mode 100644
index 00000000000..eae70c4b0b7
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/dump/ge_dump.h
@@ -0,0 +1,120 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
+
+#include <map>
+#include <string>
+#include "proto/ge_dtype.pb.h"
+#include "ir/dtype/type_id.h"
+#include "utils/utils.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+static ge::proto::DataType GetGeDataType(TypeId type_id) {
+  static const std::map<TypeId, ge::proto::DataType> data_type_map = {
+    {TypeId::kTypeUnknown, ge::proto::DT_UNDEFINED},     {TypeId::kNumberTypeFloat32, ge::proto::DT_FLOAT},
+    {TypeId::kNumberTypeFloat16, ge::proto::DT_FLOAT16}, {TypeId::kNumberTypeInt8, ge::proto::DT_INT8},
+    {TypeId::kNumberTypeUInt8, ge::proto::DT_UINT8},     {TypeId::kNumberTypeInt16, ge::proto::DT_INT16},
+    {TypeId::kNumberTypeUInt16, ge::proto::DT_UINT16},   {TypeId::kNumberTypeInt32, ge::proto::DT_INT32},
+    {TypeId::kNumberTypeInt64, ge::proto::DT_INT64},     {TypeId::kNumberTypeUInt32, ge::proto::DT_UINT32},
+    {TypeId::kNumberTypeUInt64, ge::proto::DT_UINT64},   {TypeId::kNumberTypeBool, ge::proto::DT_BOOL},
+    {TypeId::kNumberTypeFloat64, ge::proto::DT_DOUBLE},
+  };
+  MS_LOG(INFO) << "Vm origin type_id:" << type_id;
+  auto iter = data_type_map.find(type_id);
+  if (iter == data_type_map.end()) {
+    MS_LOG(EXCEPTION) << "Invalid data type:" << type_id;
+  }
+  return iter->second;
+}
+
+enum GeFormat {
+  kFormat_NCHW = 0,   // NCHW
+  kFormat_NHWC,       // NHWC
+  kFormat_ND,         // Nd Tensor
+  kFormat_NC1HWC0,    // NC1HWC0
+  kFormat_FRACTAL_Z,  // FRACTAL_Z
+  kFormat_NC1C0HWPAD,
+  kFormat_NHWC1C0,
+  kFormat_FSR_NCHW,
+  kFormat_FRACTAL_DECONV,
+  kFormat_C1HWNC0,
+  kFormat_FRACTAL_DECONV_TRANSPOSE,
+  kFormat_FRACTAL_DECONV_SP_STRIDE_TRANS,
+  kFormat_NC1HWC0_C04,    // NC1HWC0, C0 =4
+  kFormat_FRACTAL_Z_C04,  // FRACZ, C0 =4
+  kFormat_CHWN,
+  kFormat_FRACTAL_DECONV_SP_STRIDE8_TRANS,
+  kFormat_HWCN,
+  kFormat_NC1KHKWHWC0,  // KH,KW kernel h& kernel w maxpooling max output format
+  kFormat_BN_WEIGHT,
+  kFormat_FILTER_HWCK,  // filter input tensor format
+  kFormat_HASHTABLE_LOOKUP_LOOKUPS = 20,
+  kFormat_HASHTABLE_LOOKUP_KEYS,
+  kFormat_HASHTABLE_LOOKUP_VALUE,
+  kFormat_HASHTABLE_LOOKUP_OUTPUT,
+  kFormat_HASHTABLE_LOOKUP_HITS = 24,
+  kFormat_C1HWNCoC0,
+  kFormat_MD,
+  kFormat_NDHWC,
+  kFormat_FRACTAL_ZZ,
+  kFormat_FRACTAL_NZ,
+  kFormat_NCDHW,
+  kFormat_DHWCN,  // 3D filter input tensor format
+  kFormat_NDC1HWC0,
+  kFormat_FRACTAL_Z_3D,
+  kFormat_CN,
+  kFormat_NC,
+  kFormat_DHWNC,
+  kFormat_FRACTAL_Z_3D_TRANSPOSE,  // 3D filter(transpose) input tensor format
+  kFormat_RESERVED,
+  kFormat_ALL
+};
+
+static GeFormat GetGeFormat(const std::string &format, size_t shape_size) {
+  static const std::map<std::string, GeFormat> format_map = {
+    // default format: nchw, fractal_nz?
+    {kOpFormat_DEFAULT, kFormat_NCHW},
+    {kOpFormat_NC1KHKWHWC0, kFormat_NC1KHKWHWC0},
+    {kOpFormat_ND, kFormat_ND},
+    {kOpFormat_NCHW, kFormat_NCHW},
+    {kOpFormat_NHWC, kFormat_NHWC},
+    {kOpFormat_HWCN, kFormat_HWCN},
+    {kOpFormat_NC1HWC0, kFormat_NC1HWC0},
+    {kOpFormat_FRAC_Z, kFormat_FRACTAL_Z},
+    {kOpFormat_FRAC_NZ, kFormat_FRACTAL_NZ},
+    {kOpFormat_C1HWNCoC0, kFormat_C1HWNCoC0},
+    {kOpFormat_NC1HWC0_C04, kFormat_NC1HWC0_C04},
+    {kOpFormat_FRACTAL_Z_C04, kFormat_FRACTAL_Z_C04},
+    {kOpFormat_NDHWC, kFormat_NDHWC},
+  };
+  MS_LOG(INFO) << "GetGeFormat format:" << format << " shape_size:" << shape_size;
+  if (format == kOpFormat_DEFAULT) {
+    return shape_size == 4 ? kFormat_NCHW : kFormat_ND;
+  }
+  auto iter = format_map.find(format);
+  if (iter == format_map.end()) {
+    MS_LOG(EXCEPTION) << "Invalid format:" << format;
+  }
+  return iter->second;
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
diff --git a/mindspore/ccsrc/runtime/device/ascend/dump/proto/ge_dtype.proto b/mindspore/ccsrc/runtime/device/ascend/dump/proto/ge_dtype.proto
new file mode 100644
index 00000000000..7c690524d9d
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/dump/proto/ge_dtype.proto
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+
+package ge.proto;
+
+enum DataType
+{
+  DT_UNDEFINED = 0;  // Used to indicate a DataType field has not been set.
+  DT_FLOAT     = 1;  // float type
+  DT_FLOAT16   = 2;  // fp16 type
+  DT_INT8      = 3;  // int8 type
+  DT_UINT8     = 4;  // uint8 type
+  DT_INT16     = 5;  // int16 type
+  DT_UINT16    = 6;  // uint16 type
+  DT_INT32     = 7;  //
+  DT_INT64     = 8;  // int64 type
+  DT_UINT32    = 9;  // unsigned int32
+  DT_UINT64    = 10;  // unsigned int64
+  DT_BOOL      = 11;  // bool type
+  DT_DOUBLE    = 12; // double type
+  DT_STRING = 13;            // string type
+  DT_DUAL_SUB_INT8 = 14;    /**< dual output int8 type */
+  DT_DUAL_SUB_UINT8 = 15;    /**< dual output uint8 type */
+  DT_COMPLEX64 = 16;         // complex64 type
+  DT_COMPLEX128 = 17;        // complex128 type
+  DT_QINT8 = 18;             // qint8 type
+  DT_QINT16 = 19;            // qint16 type
+  DT_QINT32 = 20;            // qint32 type
+  DT_QUINT8 = 21;            // quint8 type
+  DT_QUINT16 = 22;           // quint16 type
+  DT_RESOURCE  = 23;         // resource type
+  DT_STRING_REF = 24;        // string_ref type
+  DT_DUAL      = 25;              /**< dual output type */
+}
\ No newline at end of file
diff --git a/mindspore/ccsrc/runtime/device/ascend/dump/proto/op_mapping_info.proto b/mindspore/ccsrc/runtime/device/ascend/dump/proto/op_mapping_info.proto
new file mode 100644
index 00000000000..d3377c655d7
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/dump/proto/op_mapping_info.proto
@@ -0,0 +1,78 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+package aicpu.dump;
+
+message Shape {
+  repeated uint64 dim = 1;
+}
+
+message Output {
+  int32 data_type = 1;
+  int32 format = 2;
+  Shape shape = 3;
+  uint64 address = 4;
+  string original_name = 5;
+  int32 original_output_index = 6;
+  int32 original_output_data_type = 7;
+  int32 original_output_format = 8;
+  uint64 size = 9;
+};
+
+message Input {
+  int32 data_type = 1;
+  int32 format = 2;
+  Shape shape = 3;
+  uint64 address = 4;
+  uint64 size = 5;
+}
+
+message Op {
+  string op_name = 1;
+  string op_type = 2;
+};
+
+message Task {
+  uint32 task_id = 1;
+  uint32 stream_id = 2;
+  Op op = 3;
+  repeated Output output = 4;
+  bool end_graph = 5;
+  repeated Input input = 6;
+};
+
+message OpMappingInfo {
+  string dump_path = 1;
+  oneof model_name_param {
+    string model_name = 2;
+  }
+  oneof model_id_param {
+    uint32 model_id = 3;
+  }
+  oneof step_id {
+    uint64 step_id_addr = 4;
+  }
+  oneof iterations_per_loop {
+    uint64 iterations_per_loop_addr = 5;
+  }
+  oneof loop_cond {
+    uint64 loop_cond_addr = 6;
+  }
+  uint32 flag = 7; // 0x01 load, 0x00 unload
+  repeated Task task = 8;
+  string dump_step = 9;
+};
diff --git a/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
similarity index 93%
rename from mindspore/ccsrc/device/ascend/kernel_build_ascend.cc
rename to mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
index bd0b4363443..39cefcb0201 100644
--- a/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
@@ -14,26 +14,26 @@
  * limitations under the License.
  */
 
-#include "device/ascend/kernel_build_ascend.h"
+#include "runtime/device/ascend/kernel_build_ascend.h"
 
 #include <vector>
 #include <string>
 #include <memory>
 #include <functional>
 
-#include "device/ascend/kernel_select_ascend.h"
-#include "device/kernel_info.h"
-#include "kernel/kernel.h"
-#include "kernel/tbe/tbe_kernel_build.h"
-#include "kernel/tbe/tbe_kernel_parallel_build.h"
-#include "kernel/akg/ascend/akg_ascend_kernel_build.h"
-#include "kernel/aicpu/aicpu_kernel_build.h"
-#include "kernel/hccl/hccl_kernel_build.h"
-#include "kernel/rts/rt_kernel_build.h"
-#include "kernel/tbe/tbe_utils.h"
-#include "kernel/common_utils.h"
-#include "operator/ops.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/ascend/kernel_select_ascend.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
+#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h"
+#include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
+#include "backend/kernel_compiler/hccl/hccl_kernel_build.h"
+#include "backend/kernel_compiler/rts/rt_kernel_build.h"
+#include "backend/kernel_compiler/tbe/tbe_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "frontend/operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "./common.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/ascend/kernel_build_ascend.h b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h
similarity index 97%
rename from mindspore/ccsrc/device/ascend/kernel_build_ascend.h
rename to mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h
index d987b6ce7a8..0d2870eb0aa 100644
--- a/mindspore/ccsrc/device/ascend/kernel_build_ascend.h
+++ b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_CCSRC_DEVICE_ASCEND_KERNEL_BUILD_ASCEND_H_
 #define MINDSPORE_CCSRC_DEVICE_ASCEND_KERNEL_BUILD_ASCEND_H_
 
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
similarity index 98%
rename from mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
rename to mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
index cde79a18f76..e8fc6c7a985 100644
--- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/ascend/kernel_select_ascend.h"
+#include "runtime/device/ascend/kernel_select_ascend.h"
 
 #include <string>
 #include <vector>
@@ -26,15 +26,15 @@
 #include <unordered_set>
 #include "common/utils.h"
 #include "debug/anf_ir_dump.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/func_graph.h"
 #include "utils/context/ms_context.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
-#include "kernel/common_utils.h"
-#include "kernel/kernel_query.h"
-#include "kernel/oplib/oplib.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/kernel_compiler/kernel_query.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.h b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.h
similarity index 96%
rename from mindspore/ccsrc/device/ascend/kernel_select_ascend.h
rename to mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.h
index 7b7a7b9fb9b..8a93b77cec8 100644
--- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.h
+++ b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_KERNEL_SELECT_ASCEND_ANFALGO_H_
 #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_KERNEL_SELECT_ASCEND_ANFALGO_H_
 #include "ir/anf.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 namespace mindspore {
 namespace device {
 namespace ascend {
diff --git a/mindspore/ccsrc/device/ascend/kernel_select_graph_kernel.cc b/mindspore/ccsrc/runtime/device/ascend/kernel_select_graph_kernel.cc
similarity index 98%
rename from mindspore/ccsrc/device/ascend/kernel_select_graph_kernel.cc
rename to mindspore/ccsrc/runtime/device/ascend/kernel_select_graph_kernel.cc
index db31460d318..c76f96728f4 100644
--- a/mindspore/ccsrc/device/ascend/kernel_select_graph_kernel.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/kernel_select_graph_kernel.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "device/ascend/kernel_select_ascend.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
+#include "runtime/device/ascend/kernel_select_ascend.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
 #include "ir/func_graph.h"
-#include "kernel/common_utils.h"
-#include "kernel/kernel_query.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/kernel_compiler/kernel_query.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 
 namespace mindspore {
 namespace device {
@@ -362,8 +362,7 @@ void CheckFormatsAndDtypes(const CNodePtr &kernel_node, const std::vector<AnfNod
       continue;
     }
     for (auto &node_user : iter->second) {
-      if (node_user.first->kernel_info() == nullptr ||
-          node_user.first->kernel_info()->select_kernel_build_info() == nullptr) {
+      if (node_user.first->kernel_info() == nullptr || !node_user.first->kernel_info()->has_build_info()) {
         // maybe not a real kernel.
         continue;
       }
diff --git a/mindspore/ccsrc/device/ascend/profiling/plugin_impl.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/plugin_impl.cc
similarity index 95%
rename from mindspore/ccsrc/device/ascend/profiling/plugin_impl.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/plugin_impl.cc
index 7790107aa10..4886c00a8e0 100644
--- a/mindspore/ccsrc/device/ascend/profiling/plugin_impl.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/plugin_impl.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/ascend/profiling/plugin_impl.h"
+#include "runtime/device/ascend/profiling/plugin_impl.h"
 #include <string>
 #include "utils/log_adapter.h"
 using std::string;
diff --git a/mindspore/ccsrc/device/ascend/profiling/plugin_impl.h b/mindspore/ccsrc/runtime/device/ascend/profiling/plugin_impl.h
similarity index 100%
rename from mindspore/ccsrc/device/ascend/profiling/plugin_impl.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/plugin_impl.h
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_engine_impl.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_engine_impl.cc
similarity index 89%
rename from mindspore/ccsrc/device/ascend/profiling/profiling_engine_impl.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/profiling_engine_impl.cc
index a3934093348..1f35cba0f74 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_engine_impl.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_engine_impl.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/ascend/profiling/profiling_engine_impl.h"
+#include "runtime/device/ascend/profiling/profiling_engine_impl.h"
 #include "utils/log_adapter.h"
-#include "device/ascend/profiling/plugin_impl.h"
+#include "runtime/device/ascend/profiling/plugin_impl.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_engine_impl.h b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_engine_impl.h
similarity index 100%
rename from mindspore/ccsrc/device/ascend/profiling/profiling_engine_impl.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/profiling_engine_impl.h
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc
similarity index 97%
rename from mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc
index a2fe5b852df..6117fe5ecf0 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "device/ascend/profiling/profiling_manager.h"
+#include "runtime/device/ascend/profiling/profiling_manager.h"
 #include <stdlib.h>
 #include <vector>
 #include "securec/include/securec.h"
 #include "./prof_mgr_core.h"
-#include "device/ascend/profiling/plugin_impl.h"
-#include "device/ascend/profiling/profiling_engine_impl.h"
+#include "runtime/device/ascend/profiling/plugin_impl.h"
+#include "runtime/device/ascend/profiling/profiling_engine_impl.h"
 #include "utils/log_adapter.h"
 #include "utils/context/ms_context.h"
 #include "common/utils.h"
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.h b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h
similarity index 100%
rename from mindspore/ccsrc/device/ascend/profiling/profiling_manager.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.cc
similarity index 97%
rename from mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.cc
index 17ac4c45302..5b1db6a4049 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "device/ascend/profiling/reporter/graph_desc_reporter.h"
-#include "device/ascend/profiling/profiling_utils.h"
-#include "kernel/kernel.h"
-#include "device/ascend/profiling/profiling_manager.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/ascend/profiling/reporter/graph_desc_reporter.h"
+#include "runtime/device/ascend/profiling/profiling_utils.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "runtime/device/ascend/profiling/profiling_manager.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
 #include "utils/utils.h"
-#include "device/ascend/profiling/reporter/task_desc_reporter.h"
+#include "runtime/device/ascend/profiling/reporter/task_desc_reporter.h"
 #include "utils/context/ms_context.h"
-#include "device/ascend/profiling/reporter/point_reporter.h"
+#include "runtime/device/ascend/profiling/reporter/point_reporter.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.h
similarity index 98%
rename from mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.h
index a3c77394478..de8ff2ac399 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.h
@@ -22,9 +22,9 @@
 #include <vector>
 #include <set>
 #include <unordered_map>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 #include "utils/contract.h"
-#include "device/ascend/profiling/reporter/profiling_desc.h"
+#include "runtime/device/ascend/profiling/reporter/profiling_desc.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/desc_reporter.cc
similarity index 94%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/desc_reporter.cc
index cf80c07ca95..87e2bbcb064 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/desc_reporter.cc
@@ -15,8 +15,8 @@
  */
 
 #include <algorithm>
-#include "device/ascend/profiling/reporter/desc_reporter.h"
-#include "device/ascend/profiling/plugin_impl.h"
+#include "runtime/device/ascend/profiling/reporter/desc_reporter.h"
+#include "runtime/device/ascend/profiling/plugin_impl.h"
 #include "utils/log_adapter.h"
 
 constexpr size_t kReportMaxLen = 2048;
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.h b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/desc_reporter.h
similarity index 93%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/desc_reporter.h
index c8e1b3ed62b..f25c64ce058 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/desc_reporter.h
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/desc_reporter.h
@@ -22,9 +22,9 @@
 #include <vector>
 #include <memory>
 #include "toolchain/prof_reporter.h"
-#include "device/ascend/profiling/reporter/profiling_desc.h"
+#include "runtime/device/ascend/profiling/reporter/profiling_desc.h"
 #include "utils/contract.h"
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/graph_desc_reporter.cc
similarity index 95%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/graph_desc_reporter.cc
index 1f2d1570bb6..5c028986d4b 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/graph_desc_reporter.cc
@@ -16,8 +16,8 @@
 
 #include <vector>
 #include <memory>
-#include "device/ascend/profiling/reporter/graph_desc_reporter.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/ascend/profiling/reporter/graph_desc_reporter.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.h b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/graph_desc_reporter.h
similarity index 95%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/graph_desc_reporter.h
index 10f78092f29..531f122cde9 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/graph_desc_reporter.h
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/graph_desc_reporter.h
@@ -20,7 +20,7 @@
 #include <utility>
 #include <string>
 #include <vector>
-#include "device/ascend/profiling/reporter/desc_reporter.h"
+#include "runtime/device/ascend/profiling/reporter/desc_reporter.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/point_reporter.cc
similarity index 93%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/point_reporter.cc
index 0024ab9c22f..42a1b4c286d 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/point_reporter.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/ascend/profiling/reporter/point_reporter.h"
+#include "runtime/device/ascend/profiling/reporter/point_reporter.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.h b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/point_reporter.h
similarity index 95%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/point_reporter.h
index ae12672df6e..c24535f4ecb 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/point_reporter.h
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/point_reporter.h
@@ -19,7 +19,7 @@
 
 #include <memory>
 #include <string>
-#include "device/ascend/profiling/reporter/desc_reporter.h"
+#include "runtime/device/ascend/profiling/reporter/desc_reporter.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.cc
similarity index 97%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.cc
index 082cb81e420..4aec72472c7 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.cc
@@ -17,7 +17,7 @@
 #include <iterator>
 #include <sstream>
 #include <algorithm>
-#include "device/ascend/profiling/reporter/profiling_desc.h"
+#include "runtime/device/ascend/profiling/reporter/profiling_desc.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.h b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.h
similarity index 100%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/profiling_desc.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.h
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/task_desc_reporter.cc
similarity index 92%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.cc
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/task_desc_reporter.cc
index 0bd66e31efb..26d722aa1a5 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/task_desc_reporter.cc
@@ -15,9 +15,9 @@
  */
 
 #include <memory>
-#include "device/ascend/profiling/reporter/task_desc_reporter.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/ascend_kernel_mod.h"
+#include "runtime/device/ascend/profiling/reporter/task_desc_reporter.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/ascend_kernel_mod.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.h b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/task_desc_reporter.h
similarity index 96%
rename from mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.h
rename to mindspore/ccsrc/runtime/device/ascend/profiling/reporter/task_desc_reporter.h
index 087c691a5fe..51526735a96 100644
--- a/mindspore/ccsrc/device/ascend/profiling/reporter/task_desc_reporter.h
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/task_desc_reporter.h
@@ -20,7 +20,7 @@
 #include <utility>
 #include <string>
 #include <vector>
-#include "device/ascend/profiling/reporter/desc_reporter.h"
+#include "runtime/device/ascend/profiling/reporter/desc_reporter.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/ascend/readme.md b/mindspore/ccsrc/runtime/device/ascend/readme.md
similarity index 100%
rename from mindspore/ccsrc/device/ascend/readme.md
rename to mindspore/ccsrc/runtime/device/ascend/readme.md
diff --git a/mindspore/ccsrc/device/ascend/tasksink/runtime_utils.cc b/mindspore/ccsrc/runtime/device/ascend/tasksink/runtime_utils.cc
similarity index 98%
rename from mindspore/ccsrc/device/ascend/tasksink/runtime_utils.cc
rename to mindspore/ccsrc/runtime/device/ascend/tasksink/runtime_utils.cc
index 3faeefb820f..dba71edfd32 100644
--- a/mindspore/ccsrc/device/ascend/tasksink/runtime_utils.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/tasksink/runtime_utils.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/ascend/tasksink/runtime_utils.h"
+#include "runtime/device/ascend/tasksink/runtime_utils.h"
 
 #include <string>
 
diff --git a/mindspore/ccsrc/device/ascend/tasksink/runtime_utils.h b/mindspore/ccsrc/runtime/device/ascend/tasksink/runtime_utils.h
similarity index 100%
rename from mindspore/ccsrc/device/ascend/tasksink/runtime_utils.h
rename to mindspore/ccsrc/runtime/device/ascend/tasksink/runtime_utils.h
diff --git a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc b/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc
similarity index 96%
rename from mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
rename to mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc
index e026459ae97..5aeb932105f 100644
--- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "device/ascend/tasksink/task_generator.h"
+#include "runtime/device/ascend/tasksink/task_generator.h"
 
 #include <runtime/rt.h>
-#include "kernel/task_stream.h"
+#include "backend/kernel_compiler/task_stream.h"
 #include "utils/context/ms_context.h"
 #include "common/utils.h"
-#include "device/ascend/profiling/profiling_utils.h"
-#include "device/ascend/profiling/profiling_manager.h"
+#include "runtime/device/ascend/profiling/profiling_utils.h"
+#include "runtime/device/ascend/profiling/profiling_manager.h"
 
 namespace mindspore {
 namespace device {
@@ -127,6 +127,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
   AddressPtrList kernel_outputs;
   auto kernel_mod = AnfAlgo::GetKernelMod(anf_node_ptr);
   MS_EXCEPTION_IF_NULL(kernel_mod);
+  kernel_mod->set_kernel_name(anf_node_ptr->fullname_with_scope());
   if (AnfAlgo::GetCNodeName(anf_node_ptr) != kAtomicAddrCleanOpName) {
     for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_node_ptr); ++i) {
       auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i);
diff --git a/mindspore/ccsrc/device/ascend/tasksink/task_generator.h b/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.h
similarity index 95%
rename from mindspore/ccsrc/device/ascend/tasksink/task_generator.h
rename to mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.h
index ecd5889b04d..134dec48b66 100644
--- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.h
+++ b/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.h
@@ -22,9 +22,9 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "device/kernel_runtime.h"
+#include "runtime/device/kernel_runtime.h"
 #include "ir/anf.h"
-#include "kernel/ascend_kernel_mod.h"
+#include "backend/kernel_compiler/ascend_kernel_mod.h"
 #include "framework/ge_runtime/task_info.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/convert_tensor_utils.cc b/mindspore/ccsrc/runtime/device/convert_tensor_utils.cc
similarity index 97%
rename from mindspore/ccsrc/device/convert_tensor_utils.cc
rename to mindspore/ccsrc/runtime/device/convert_tensor_utils.cc
index bac72727c2c..cfd9b0fbdfe 100644
--- a/mindspore/ccsrc/device/convert_tensor_utils.cc
+++ b/mindspore/ccsrc/runtime/device/convert_tensor_utils.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/convert_tensor_utils.h"
+#include "runtime/device/convert_tensor_utils.h"
 #include <vector>
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/convert_tensor_utils.h b/mindspore/ccsrc/runtime/device/convert_tensor_utils.h
similarity index 100%
rename from mindspore/ccsrc/device/convert_tensor_utils.h
rename to mindspore/ccsrc/runtime/device/convert_tensor_utils.h
diff --git a/mindspore/ccsrc/device/cpu/cpu_device_address.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc
similarity index 95%
rename from mindspore/ccsrc/device/cpu/cpu_device_address.cc
rename to mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc
index 09ab0da12b6..92269233bdc 100644
--- a/mindspore/ccsrc/device/cpu/cpu_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/cpu_device_address.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include <vector>
-#include "device/convert_tensor_utils.h"
+#include "runtime/device/convert_tensor_utils.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/cpu_device_address.h b/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.h
similarity index 94%
rename from mindspore/ccsrc/device/cpu/cpu_device_address.h
rename to mindspore/ccsrc/runtime/device/cpu/cpu_device_address.h
index a041567f479..63cf171fa2b 100644
--- a/mindspore/ccsrc/device/cpu/cpu_device_address.h
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.h
@@ -18,7 +18,7 @@
 
 #include <string>
 #include <vector>
-#include "device/device_address.h"
+#include "runtime/device/device_address.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc
similarity index 97%
rename from mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc
rename to mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc
index f46d10ed82a..d2e41a1fbd1 100644
--- a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/cpu_kernel_runtime.h"
+#include "runtime/device/cpu/cpu_kernel_runtime.h"
 #include <string>
 #include <vector>
 #include <memory>
@@ -22,15 +22,15 @@
 #include <functional>
 #include <unordered_map>
 #include <set>
-#include "kernel/kernel.h"
-#include "device/cpu/cpu_device_address.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "runtime/device/cpu/cpu_device_address.h"
 #include "utils/context/ms_context.h"
 #include "utils/config_manager.h"
 #include "utils/profile.h"
 #include "common/utils.h"
-#include "session/anf_runtime_algorithm.h"
-#include "session/session_basic.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/session/session_basic.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.h b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h
similarity index 92%
rename from mindspore/ccsrc/device/cpu/cpu_kernel_runtime.h
rename to mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h
index 354d2922c28..a29f840bfd0 100644
--- a/mindspore/ccsrc/device/cpu/cpu_kernel_runtime.h
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h
@@ -21,11 +21,11 @@
 #include <string>
 #include <unordered_map>
 #include <set>
-#include "device/kernel_runtime.h"
-#include "session/kernel_graph.h"
-#include "session/session_basic.h"
-#include "device/cpu/cpu_resource_manager.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_runtime.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/session_basic.h"
+#include "runtime/device/cpu/cpu_resource_manager.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/any.h"
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/cpu_resource_manager.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_resource_manager.cc
similarity index 97%
rename from mindspore/ccsrc/device/cpu/cpu_resource_manager.cc
rename to mindspore/ccsrc/runtime/device/cpu/cpu_resource_manager.cc
index c69ef353055..c607260ab37 100644
--- a/mindspore/ccsrc/device/cpu/cpu_resource_manager.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_resource_manager.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/cpu_resource_manager.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/cpu/cpu_resource_manager.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/cpu_resource_manager.h b/mindspore/ccsrc/runtime/device/cpu/cpu_resource_manager.h
similarity index 90%
rename from mindspore/ccsrc/device/cpu/cpu_resource_manager.h
rename to mindspore/ccsrc/runtime/device/cpu/cpu_resource_manager.h
index d130241464b..d251760dd26 100644
--- a/mindspore/ccsrc/device/cpu/cpu_resource_manager.h
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_resource_manager.h
@@ -18,10 +18,10 @@
 
 #include <vector>
 #include <unordered_map>
-#include "session/kernel_graph.h"
-#include "session/session_basic.h"
-#include "device/device_address.h"
-#include "device/cpu/cpu_simple_mem_plan.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/session_basic.h"
+#include "runtime/device/device_address.h"
+#include "runtime/device/cpu/cpu_simple_mem_plan.h"
 namespace mindspore {
 namespace device {
 namespace cpu {
diff --git a/mindspore/ccsrc/device/cpu/cpu_simple_mem_plan.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_simple_mem_plan.cc
similarity index 97%
rename from mindspore/ccsrc/device/cpu/cpu_simple_mem_plan.cc
rename to mindspore/ccsrc/runtime/device/cpu/cpu_simple_mem_plan.cc
index e6cb6ee53a0..7838e669847 100644
--- a/mindspore/ccsrc/device/cpu/cpu_simple_mem_plan.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_simple_mem_plan.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/cpu_simple_mem_plan.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/cpu/cpu_simple_mem_plan.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/cpu_simple_mem_plan.h b/mindspore/ccsrc/runtime/device/cpu/cpu_simple_mem_plan.h
similarity index 94%
rename from mindspore/ccsrc/device/cpu/cpu_simple_mem_plan.h
rename to mindspore/ccsrc/runtime/device/cpu/cpu_simple_mem_plan.h
index 7633ef3f453..123e29fbe50 100644
--- a/mindspore/ccsrc/device/cpu/cpu_simple_mem_plan.h
+++ b/mindspore/ccsrc/runtime/device/cpu/cpu_simple_mem_plan.h
@@ -18,8 +18,8 @@
 
 #include <vector>
 #include <unordered_map>
-#include "session/kernel_graph.h"
-#include "device/device_address.h"
+#include "backend/session/kernel_graph.h"
+#include "runtime/device/device_address.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/kernel_select_cpu.cc b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc
similarity index 98%
rename from mindspore/ccsrc/device/cpu/kernel_select_cpu.cc
rename to mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc
index 9d72bcab890..9528e61ee92 100644
--- a/mindspore/ccsrc/device/cpu/kernel_select_cpu.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "device/cpu/kernel_select_cpu.h"
+#include "runtime/device/cpu/kernel_select_cpu.h"
 
 #include <string>
 #include <memory>
 #include <algorithm>
 
-#include "kernel/cpu/cpu_kernel_factory.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/cpu/kernel_select_cpu.h b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h
similarity index 100%
rename from mindspore/ccsrc/device/cpu/kernel_select_cpu.h
rename to mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h
diff --git a/mindspore/ccsrc/device/cpu/mpi/mpi_adapter.cc b/mindspore/ccsrc/runtime/device/cpu/mpi/mpi_adapter.cc
similarity index 99%
rename from mindspore/ccsrc/device/cpu/mpi/mpi_adapter.cc
rename to mindspore/ccsrc/runtime/device/cpu/mpi/mpi_adapter.cc
index 9b06c0a40a4..c124523d593 100644
--- a/mindspore/ccsrc/device/cpu/mpi/mpi_adapter.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/mpi/mpi_adapter.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/mpi/mpi_adapter.h"
+#include "runtime/device/cpu/mpi/mpi_adapter.h"
 #ifdef ENABLE_MPI
 #include <algorithm>
 #include <sstream>
diff --git a/mindspore/ccsrc/device/cpu/mpi/mpi_adapter.h b/mindspore/ccsrc/runtime/device/cpu/mpi/mpi_adapter.h
similarity index 100%
rename from mindspore/ccsrc/device/cpu/mpi/mpi_adapter.h
rename to mindspore/ccsrc/runtime/device/cpu/mpi/mpi_adapter.h
diff --git a/mindspore/ccsrc/device/cpu/readme.md b/mindspore/ccsrc/runtime/device/cpu/readme.md
similarity index 100%
rename from mindspore/ccsrc/device/cpu/readme.md
rename to mindspore/ccsrc/runtime/device/cpu/readme.md
diff --git a/mindspore/ccsrc/device/device_address.h b/mindspore/ccsrc/runtime/device/device_address.h
similarity index 91%
rename from mindspore/ccsrc/device/device_address.h
rename to mindspore/ccsrc/runtime/device/device_address.h
index 0447cc25397..32f5fcced9e 100644
--- a/mindspore/ccsrc/device/device_address.h
+++ b/mindspore/ccsrc/runtime/device/device_address.h
@@ -21,8 +21,7 @@
 #include <vector>
 #include <memory>
 #include "ir/dtype.h"
-
-using std::string;
+#include "ir/device_sync.h"
 
 namespace mindspore {
 namespace device {
@@ -34,6 +33,7 @@ class CPUKernelRuntime;
 namespace ascend {
 class AscendKernelRuntime;
 class AscendMemoryManager;
+class DataDumper;
 namespace tasksink {
 class TaskGenerator;
 }  // namespace tasksink
@@ -50,20 +50,18 @@ namespace device {
 enum class DeviceAddressStatus { kInDevice, kInHost, kInDeviceToHost, kInHostToDevice };
 enum class DeviceAddressType { kUnknown, kAscend, kCPU, kGPU };
 
-class DeviceAddress {
+class DeviceAddress : public mindspore::DeviceSync {
  public:
   explicit DeviceAddress(void *ptr, size_t size) : ptr_(ptr), size_(size) {}
   explicit DeviceAddress(void *ptr, size_t size, const string &format, TypeId type_id)
       : ptr_(ptr), size_(size), format_(format), type_id_(type_id) {}
   virtual ~DeviceAddress() { ptr_ = nullptr; }
-  virtual bool SyncDeviceToHost(const std::vector<int> &shape, size_t size, TypeId type, void *host_ptr) const = 0;
-  virtual bool SyncHostToDevice(const std::vector<int> &shape, size_t size, TypeId type,
-                                const void *host_ptr) const = 0;
   const void *GetPtr() const { return ptr_; }
   size_t GetSize() const { return size_; }
   std::string format() const { return format_; }
   TypeId type_id() const { return type_id_; }
   void set_host_shape(const std::vector<int> &shape) { host_shape_ = shape; }
+  virtual void UpdateCommunicationAddress() {}
   virtual void set_status(DeviceAddressStatus status) {}
   virtual DeviceAddressStatus status() const { return DeviceAddressStatus::kInDevice; }
   virtual DeviceAddressType DeviceType() const { return DeviceAddressType::kUnknown; }
@@ -89,6 +87,7 @@ class DeviceAddress {
   friend class mindspore::device::gpu::GPUMemoryManager;
   friend class mindspore::device::ascend::AscendKernelRuntime;
   friend class mindspore::device::ascend::AscendMemoryManager;
+  friend class mindspore::device::ascend::DataDumper;
 };
 
 using DeviceAddressPtr = std::shared_ptr<DeviceAddress>;
diff --git a/mindspore/ccsrc/device/gpu/blocking_queue.cc b/mindspore/ccsrc/runtime/device/gpu/blocking_queue.cc
similarity index 98%
rename from mindspore/ccsrc/device/gpu/blocking_queue.cc
rename to mindspore/ccsrc/runtime/device/gpu/blocking_queue.cc
index 3b5e75f551d..547c2fbe648 100644
--- a/mindspore/ccsrc/device/gpu/blocking_queue.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/blocking_queue.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "device/gpu/blocking_queue.h"
+#include "runtime/device/gpu/blocking_queue.h"
 #include <chrono>
-#include "device/gpu/gpu_common.h"
+#include "runtime/device/gpu/gpu_common.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/gpu/blocking_queue.h b/mindspore/ccsrc/runtime/device/gpu/blocking_queue.h
similarity index 100%
rename from mindspore/ccsrc/device/gpu/blocking_queue.h
rename to mindspore/ccsrc/runtime/device/gpu/blocking_queue.h
diff --git a/mindspore/ccsrc/device/gpu/cuda_common.h b/mindspore/ccsrc/runtime/device/gpu/cuda_common.h
similarity index 97%
rename from mindspore/ccsrc/device/gpu/cuda_common.h
rename to mindspore/ccsrc/runtime/device/gpu/cuda_common.h
index b79ba8bc281..2689fdbacab 100644
--- a/mindspore/ccsrc/device/gpu/cuda_common.h
+++ b/mindspore/ccsrc/runtime/device/gpu/cuda_common.h
@@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_DEVICE_GPU_CUDA_COMMON_H_
 
 #include <algorithm>
-#include "device/gpu/gpu_device_manager.h"
+#include "runtime/device/gpu/gpu_device_manager.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/cuda_driver.cc b/mindspore/ccsrc/runtime/device/gpu/cuda_driver.cc
similarity index 99%
rename from mindspore/ccsrc/device/gpu/cuda_driver.cc
rename to mindspore/ccsrc/runtime/device/gpu/cuda_driver.cc
index 0dee53df641..1f5e5e3c22a 100644
--- a/mindspore/ccsrc/device/gpu/cuda_driver.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/cuda_driver.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/gpu/cuda_driver.h"
+#include "runtime/device/gpu/cuda_driver.h"
 #include <iostream>
 #include "utils/log_adapter.h"
 #include "utils/convert_utils.h"
diff --git a/mindspore/ccsrc/device/gpu/cuda_driver.h b/mindspore/ccsrc/runtime/device/gpu/cuda_driver.h
similarity index 100%
rename from mindspore/ccsrc/device/gpu/cuda_driver.h
rename to mindspore/ccsrc/runtime/device/gpu/cuda_driver.h
diff --git a/mindspore/ccsrc/device/gpu/distribution/collective_common.h b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_common.h
similarity index 71%
rename from mindspore/ccsrc/device/gpu/distribution/collective_common.h
rename to mindspore/ccsrc/runtime/device/gpu/distribution/collective_common.h
index f9564a0c747..5373f21d70c 100644
--- a/mindspore/ccsrc/device/gpu/distribution/collective_common.h
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_common.h
@@ -23,16 +23,17 @@
 namespace mindspore {
 namespace device {
 namespace gpu {
-#define MAX_HOSTNAME_LEN 1024
-#define CHECK_RET(expression, result, message)                                                                        \
-  {                                                                                                                   \
-    auto ret = (expression);                                                                                          \
-    if (ret != result) {                                                                                              \
-      std::ostringstream oss;                                                                                         \
-      oss << "Error in file " << __FILE__ << " | Error on line " << __LINE__ << " | GPU collective Error " << message \
-          << " | Error Number " << ret;                                                                               \
-      pybind11::pybind11_fail(oss.str());                                                                             \
-    }                                                                                                                 \
+constexpr int MAX_HOSTNAME_LEN = 1024;
+constexpr char NCCL_WORLD_GROUP[] = "nccl_world_group";
+#define CHECK_RET(expression, result, message)                                                                         \
+  {                                                                                                                    \
+    auto ret = (expression);                                                                                           \
+    if (ret != result) {                                                                                               \
+      std::ostringstream oss;                                                                                          \
+      oss << "Error in file " << __FILE__ << " | Error on line " << __LINE__ << " | GPU collective Error: " << message \
+          << " | Error Number " << ret;                                                                                \
+      pybind11::pybind11_fail(oss.str());                                                                              \
+    }                                                                                                                  \
   }
 }  // namespace gpu
 }  // namespace device
diff --git a/mindspore/ccsrc/device/gpu/distribution/collective_fake_init.cc b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_fake_init.cc
similarity index 93%
rename from mindspore/ccsrc/device/gpu/distribution/collective_fake_init.cc
rename to mindspore/ccsrc/runtime/device/gpu/distribution/collective_fake_init.cc
index 06497a2e82c..80793042fdc 100644
--- a/mindspore/ccsrc/device/gpu/distribution/collective_fake_init.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_fake_init.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/gpu/distribution/collective_fake_init.h"
+#include "runtime/device/gpu/distribution/collective_fake_init.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/gpu/distribution/collective_fake_init.h b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_fake_init.h
similarity index 100%
rename from mindspore/ccsrc/device/gpu/distribution/collective_fake_init.h
rename to mindspore/ccsrc/runtime/device/gpu/distribution/collective_fake_init.h
diff --git a/mindspore/ccsrc/device/gpu/distribution/collective_init.cc b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_init.cc
similarity index 97%
rename from mindspore/ccsrc/device/gpu/distribution/collective_init.cc
rename to mindspore/ccsrc/runtime/device/gpu/distribution/collective_init.cc
index d7ab95bbe84..cba789b38d2 100644
--- a/mindspore/ccsrc/device/gpu/distribution/collective_init.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_init.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/gpu/distribution/collective_init.h"
+#include "runtime/device/gpu/distribution/collective_init.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/gpu/distribution/collective_init.h b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_init.h
similarity index 84%
rename from mindspore/ccsrc/device/gpu/distribution/collective_init.h
rename to mindspore/ccsrc/runtime/device/gpu/distribution/collective_init.h
index 424abcf4700..464492d50f8 100644
--- a/mindspore/ccsrc/device/gpu/distribution/collective_init.h
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_init.h
@@ -18,6 +18,8 @@
 #define MINDSPORE_CCSRC_DEVICE_GPU_DISTRIBUTION_COLLECTIVE_INIT_H_
 
 #include <dlfcn.h>
+#include <vector>
+#include <string>
 
 namespace mindspore {
 namespace device {
@@ -25,6 +27,10 @@ namespace gpu {
 using InitMPI = void (*)();
 using InitNCCLComm = void (*)();
 using GetLocalRankId = int (*)();
+using CreateCommGroupFunc = bool (*)(const std::string &, const std::vector<unsigned int> &);
+using GetRankIDByGroupFunc = int (*)(const std::string &);
+using GetGroupSizeFunc = int (*)(const std::string &);
+using DestroyGroupFunc = bool (*)(const std::string &);
 
 class CollectiveInitializer {
  public:
diff --git a/mindspore/ccsrc/device/gpu/distribution/collective_wrapper.cc b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_wrapper.cc
similarity index 75%
rename from mindspore/ccsrc/device/gpu/distribution/collective_wrapper.cc
rename to mindspore/ccsrc/runtime/device/gpu/distribution/collective_wrapper.cc
index 5fb0f748492..f427905afa1 100644
--- a/mindspore/ccsrc/device/gpu/distribution/collective_wrapper.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/collective_wrapper.cc
@@ -20,8 +20,9 @@
 #include <memory>
 #include <string>
 #include <iostream>
-#include "device/gpu/distribution/mpi_wrapper.h"
-#include "device/gpu/distribution/nccl_wrapper.h"
+#include <vector>
+#include "runtime/device/gpu/distribution/mpi_wrapper.h"
+#include "runtime/device/gpu/distribution/nccl_wrapper.h"
 
 #ifndef EXPORT_WRAPPER
 #define EXPORT_WRAPPER __attribute__((visibility("default")))
@@ -36,6 +37,22 @@ extern "C" EXPORT_WRAPPER int local_rank_id() { return MPIWrapper::instance().lo
 
 extern "C" EXPORT_WRAPPER void InitNCCLComm() { NCCLWrapper::instance().InitNCCLComm(); }
 
+extern "C" EXPORT_WRAPPER bool CreateCommGroup(const std::string &group_name, const std::vector<unsigned int> &ranks) {
+  return MPIWrapper::instance().CreateCommGroup(group_name, ranks);
+}
+
+extern "C" EXPORT_WRAPPER int GetRankIDByGroup(const std::string &group_name) {
+  return MPIWrapper::instance().GetRankIDByGroup(group_name);
+}
+
+extern "C" EXPORT_WRAPPER int GetGroupSize(const std::string &group_name) {
+  return MPIWrapper::instance().GetGroupSize(group_name);
+}
+
+extern "C" EXPORT_WRAPPER bool DestroyGroup(const std::string &group_name) {
+  return MPIWrapper::instance().DestroyGroup(group_name);
+}
+
 extern "C" EXPORT_WRAPPER ncclResult_t AllReduce(const void *input_addr, void *output_addr, size_t count,
                                                  ncclDataType_t data_type, ncclRedOp_t reduce_type,
                                                  cudaStream_t stream) {
diff --git a/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc b/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc
new file mode 100644
index 00000000000..08ec320cab8
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc
@@ -0,0 +1,156 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "runtime/device/gpu/distribution/mpi_wrapper.h"
+#include <cuda_runtime_api.h>
+#include <string>
+#include <vector>
+#include "runtime/device/gpu/distribution/nccl_wrapper.h"
+
+namespace mindspore {
+namespace device {
+namespace gpu {
+MPIWrapper::MPIWrapper() : rank_id_(0), rank_size_(0), local_rank_id_(0) { Init(); }
+
+MPIWrapper::~MPIWrapper() {
+  int finalized;
+  MPI_Finalized(&finalized);
+  if (finalized == 0) {
+    MPI_Finalize();
+  }
+}
+
+MPIWrapper &MPIWrapper::instance() {
+  static MPIWrapper instance;
+  return instance;
+}
+
+int MPIWrapper::local_rank_id() const { return local_rank_id_; }
+
+bool MPIWrapper::CreateCommGroup(const std::string &group_name, const std::vector<unsigned int> &group_ranks) {
+  std::vector<int> ranks(group_ranks.begin(), group_ranks.end());
+  MPI_Group mpi_group;
+  CHECK_RET(MPI_Group_incl(world_group_, ranks.size(), ranks.data(), &mpi_group), MPI_SUCCESS,
+            "Failed to produce a new group from MPI_COMM_WORLD group for " + group_name);
+  SetGroupNameToMPIGroup(group_name, mpi_group);
+
+  MPI_Comm mpi_group_comm;
+  CHECK_RET(MPI_Comm_create(MPI_COMM_WORLD, mpi_group, &mpi_group_comm), MPI_SUCCESS,
+            "Failed to create MPI communicator.");
+  if (mpi_group_comm == MPI_COMM_NULL) {
+    return false;
+  }
+
+  ncclUniqueId group_unique_id;
+  if (rank_id_ == ranks[0]) {
+    group_unique_id = NCCLWrapper::instance().nccl_unique_id();
+  }
+  MPI_Bcast(&group_unique_id, sizeof(ncclUniqueId), MPI_BYTE, ranks[0], mpi_group_comm);
+
+  int group_rank[1];
+  int global_rank[1] = {rank_id_};
+  CHECK_RET(MPI_Group_translate_ranks(world_group_, 1, global_rank, mpi_group, group_rank), MPI_SUCCESS,
+            "Failed to translate global rank to group rank.");
+  if (group_rank[0] == MPI_UNDEFINED) {
+    return false;
+  }
+
+  ncclComm_t nccl_group_comm;
+  NCCLWrapper::instance().InitNCCLComm(&nccl_group_comm, ranks.size(), group_unique_id, group_rank[0]);
+  NCCLWrapper::instance().SetGroupNameToNCCLComm(group_name, nccl_group_comm);
+  return true;
+}
+
+int MPIWrapper::GetRankIDByGroup(const std::string &group_name) {
+  CHECK_RET(group_name_to_mpi_group_map_.count(group_name), 1, "Failed to get MPI group by group name " + group_name);
+  MPI_Group mpi_group = group_name_to_mpi_group_map_[group_name];
+  int rank;
+  CHECK_RET(MPI_Group_rank(mpi_group, &rank), MPI_SUCCESS, "Failed to get rank id by group name." + group_name);
+  return rank;
+}
+
+int MPIWrapper::GetGroupSize(const std::string &group_name) {
+  CHECK_RET(group_name_to_mpi_group_map_.count(group_name), 1, "Failed to get MPI group by group name" + group_name);
+  MPI_Group mpi_group = group_name_to_mpi_group_map_[group_name];
+  int size;
+  CHECK_RET(MPI_Group_size(mpi_group, &size), MPI_SUCCESS, "Failed to get group size by group name." + group_name);
+  return size;
+}
+
+bool MPIWrapper::DestroyGroup(const std::string &group_name) {
+  auto group_iter = group_name_to_mpi_group_map_.find(group_name);
+  if (group_iter == group_name_to_mpi_group_map_.end()) {
+    return false;
+  }
+  group_name_to_mpi_group_map_.erase(group_name);
+  MPI_Group mpi_group = group_iter->second;
+  CHECK_RET(MPI_Group_free(&mpi_group), MPI_SUCCESS, "Failed to free MPI group for " + group_name);
+  NCCLWrapper::instance().DestroyGroup(group_name);
+  return true;
+}
+
+void MPIWrapper::Init() {
+  int initialized;
+  CHECK_RET(MPI_Initialized(&initialized), MPI_SUCCESS, "Failed to check mpi initialization status.");
+  if (initialized == 0) {
+    MPI_Init(nullptr, nullptr);
+  }
+
+  CHECK_RET(MPI_Comm_rank(MPI_COMM_WORLD, &rank_id_), MPI_SUCCESS, "Failed to init mpi rank id.");
+  CHECK_RET(MPI_Comm_size(MPI_COMM_WORLD, &rank_size_), MPI_SUCCESS, "Failed to init mpi rank size.");
+  NCCLWrapper::instance().set_rank(rank_id_, rank_size_);
+  AssignLocalRankID();
+
+  CHECK_RET(MPI_Comm_group(MPI_COMM_WORLD, &world_group_), MPI_SUCCESS, "Failed to get group of MPI_COMM_WORLD");
+  SetGroupNameToMPIGroup(NCCL_WORLD_GROUP, world_group_);
+
+  ncclUniqueId unique_id;
+  if (rank_id_ == 0) {
+    unique_id = NCCLWrapper::instance().nccl_unique_id();
+  }
+  CHECK_RET(MPI_Bcast(reinterpret_cast<void *>(&unique_id), sizeof(unique_id), MPI_BYTE, 0, MPI_COMM_WORLD),
+            MPI_SUCCESS, "Failed to broadcast nccl unique id.");
+  NCCLWrapper::instance().set_nccl_unique_id(unique_id);
+  return;
+}
+
+void MPIWrapper::AssignLocalRankID() {
+  char host_name[MAX_HOSTNAME_LEN] = {0};
+  CHECK_RET(gethostname(host_name, MAX_HOSTNAME_LEN), 0, "Getting host name failed.");
+  size_t host_hash = std::hash<std::string>()(host_name);
+
+  const int kRankSize = rank_size_;
+  size_t all_host_hashs[kRankSize];
+  all_host_hashs[rank_id_] = host_hash;
+  CHECK_RET(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, all_host_hashs, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD),
+            MPI_SUCCESS, "MPI_Allgather host hashs failed.");
+  for (int global_rank = 0; global_rank < kRankSize; global_rank++) {
+    if (global_rank == rank_id_) {
+      break;
+    }
+    if (all_host_hashs[global_rank] == all_host_hashs[rank_id_]) {
+      local_rank_id_++;
+    }
+  }
+  return;
+}
+
+void MPIWrapper::SetGroupNameToMPIGroup(const std::string &group_name, const MPI_Group mpi_group) {
+  group_name_to_mpi_group_map_[group_name] = mpi_group;
+}
+}  // namespace gpu
+}  // namespace device
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/gpu/distribution/mpi_wrapper.h b/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.h
similarity index 69%
rename from mindspore/ccsrc/device/gpu/distribution/mpi_wrapper.h
rename to mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.h
index 6dfedea9220..19d06b32d32 100644
--- a/mindspore/ccsrc/device/gpu/distribution/mpi_wrapper.h
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.h
@@ -22,7 +22,10 @@
 #include <unistd.h>
 #include <mpi.h>
 #include <iostream>
-#include "device/gpu/distribution/collective_common.h"
+#include <map>
+#include <string>
+#include <vector>
+#include "runtime/device/gpu/distribution/collective_common.h"
 
 namespace mindspore {
 namespace device {
@@ -33,16 +36,23 @@ class MPIWrapper {
   MPIWrapper &operator=(const MPIWrapper &) = delete;
   static MPIWrapper &instance();
   int local_rank_id() const;
+  bool CreateCommGroup(const std::string &group_name, const std::vector<unsigned int> &ranks);
+  int GetRankIDByGroup(const std::string &group_name);
+  int GetGroupSize(const std::string &group_name);
+  bool DestroyGroup(const std::string &group_name);
 
  private:
   MPIWrapper();
   ~MPIWrapper();
   void Init();
-  void AssignLocalRankId();
+  void AssignLocalRankID();
+  void SetGroupNameToMPIGroup(const std::string &group_name, const MPI_Group mpi_group);
 
   int rank_id_;
   int rank_size_;
   int local_rank_id_;
+  MPI_Group world_group_;
+  std::map<std::string, MPI_Group> group_name_to_mpi_group_map_;
 };
 }  // namespace gpu
 }  // namespace device
diff --git a/mindspore/ccsrc/device/gpu/distribution/nccl_wrapper.cc b/mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.cc
similarity index 52%
rename from mindspore/ccsrc/device/gpu/distribution/nccl_wrapper.cc
rename to mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.cc
index aa4756a69f7..bcba5383094 100644
--- a/mindspore/ccsrc/device/gpu/distribution/nccl_wrapper.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/gpu/distribution/nccl_wrapper.h"
+#include "runtime/device/gpu/distribution/nccl_wrapper.h"
 
 namespace mindspore {
 namespace device {
@@ -40,21 +40,51 @@ void NCCLWrapper::set_rank(int rank_id, int rank_size) {
 void NCCLWrapper::InitNCCLComm() {
   CHECK_RET(ncclCommInitRank(&comm_, rank_size_, unique_id_, rank_id_), ncclSuccess,
             "Failed to init nccl communicator.");
+  group_to_comm_map_[NCCL_WORLD_GROUP] = comm_;
+}
+
+void NCCLWrapper::InitNCCLComm(ncclComm_t *comm, int rank_size, ncclUniqueId unique_id, int rank) {
+  CHECK_RET(ncclCommInitRank(comm, rank_size, unique_id, rank), ncclSuccess, "Failed to init nccl communicator.");
 }
 
 ncclResult_t NCCLWrapper::AllReduce(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data_type,
-                                    ncclRedOp_t reduce_type, cudaStream_t stream) {
-  return ncclAllReduce(input_addr, output_addr, count, data_type, reduce_type, comm_, stream);
+                                    ncclRedOp_t reduce_type, cudaStream_t stream, const std::string &group_name) {
+  CHECK_RET(group_to_comm_map_.count(group_name), 1,
+            "Failed to find NCCL communicator for AllReduce by the group name " + group_name);
+  ncclComm_t group_comm = group_to_comm_map_[group_name];
+  return ncclAllReduce(input_addr, output_addr, count, data_type, reduce_type, group_comm, stream);
 }
 
 ncclResult_t NCCLWrapper::AllGather(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data_type,
-                                    cudaStream_t stream) {
-  return ncclAllGather(input_addr, output_addr, count, data_type, comm_, stream);
+                                    cudaStream_t stream, const std::string &group_name) {
+  CHECK_RET(group_to_comm_map_.count(group_name), 1,
+            "Failed to find NCCL communicator for AllGather by the group name " + group_name);
+  ncclComm_t group_comm = group_to_comm_map_[group_name];
+  return ncclAllGather(input_addr, output_addr, count, data_type, group_comm, stream);
 }
 
 ncclResult_t NCCLWrapper::ReduceScatter(const void *input_addr, void *output_addr, size_t count,
-                                        ncclDataType_t data_type, ncclRedOp_t reduce_type, cudaStream_t stream) {
-  return ncclReduceScatter(input_addr, output_addr, count, data_type, reduce_type, comm_, stream);
+                                        ncclDataType_t data_type, ncclRedOp_t reduce_type, cudaStream_t stream,
+                                        const std::string &group_name) {
+  CHECK_RET(group_to_comm_map_.count(group_name), 1,
+            "Failed to find NCCL communicator for ReduceScatter by the group name " + group_name);
+  ncclComm_t group_comm = group_to_comm_map_[group_name];
+  return ncclReduceScatter(input_addr, output_addr, count, data_type, reduce_type, group_comm, stream);
+}
+
+void NCCLWrapper::SetGroupNameToNCCLComm(const std::string &group_name, const ncclComm_t comm) {
+  group_to_comm_map_[group_name] = comm;
+}
+
+void NCCLWrapper::DestroyGroup(const std::string &group_name) {
+  auto group_iter = group_to_comm_map_.find(group_name);
+  if (group_iter == group_to_comm_map_.end()) {
+    return;
+  }
+  group_to_comm_map_.erase(group_iter);
+  ncclComm_t group_comm = group_iter->second;
+  CHECK_RET(ncclCommDestroy(group_comm), ncclSuccess, "Failed to destroy NCCL communicator for " + group_name);
+  return;
 }
 }  // namespace gpu
 }  // namespace device
diff --git a/mindspore/ccsrc/device/gpu/distribution/nccl_wrapper.h b/mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.h
similarity index 74%
rename from mindspore/ccsrc/device/gpu/distribution/nccl_wrapper.h
rename to mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.h
index 5df1e63bb8b..9cea338c413 100644
--- a/mindspore/ccsrc/device/gpu/distribution/nccl_wrapper.h
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.h
@@ -20,7 +20,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <nccl.h>
-#include "device/gpu/distribution/collective_common.h"
+#include <string>
+#include <map>
+#include "runtime/device/gpu/distribution/collective_common.h"
 
 namespace mindspore {
 namespace device {
@@ -34,12 +36,15 @@ class NCCLWrapper {
   void set_nccl_unique_id(ncclUniqueId unique_id);
   void set_rank(int rank_id, int rank_size);
   void InitNCCLComm();
+  void InitNCCLComm(ncclComm_t *comm, int rank_size, ncclUniqueId unique_id, int rank);
   ncclResult_t AllReduce(const void *input_addr, void *output_addr, size_t count, ncclDataType_t datatype,
-                         ncclRedOp_t op, cudaStream_t stream);
+                         ncclRedOp_t op, cudaStream_t stream, const std::string &group_name = NCCL_WORLD_GROUP);
   ncclResult_t AllGather(const void *input_addr, void *output_addr, size_t count, ncclDataType_t datatype,
-                         cudaStream_t stream);
+                         cudaStream_t stream, const std::string &group_name = NCCL_WORLD_GROUP);
   ncclResult_t ReduceScatter(const void *input_addr, void *output_addr, size_t count, ncclDataType_t datatype,
-                             ncclRedOp_t op, cudaStream_t stream);
+                             ncclRedOp_t op, cudaStream_t stream, const std::string &group_name = NCCL_WORLD_GROUP);
+  void SetGroupNameToNCCLComm(const std::string &group_name, const ncclComm_t comm);
+  void DestroyGroup(const std::string &group_name);
 
  private:
   NCCLWrapper() : rank_id_(-1), rank_size_(0) {}
@@ -50,6 +55,7 @@ class NCCLWrapper {
   int rank_size_;
   ncclUniqueId unique_id_;
   ncclComm_t comm_;
+  std::map<std::string, ncclComm_t> group_to_comm_map_;
 };
 }  // namespace gpu
 }  // namespace device
diff --git a/mindspore/ccsrc/device/gpu/gpu_buffer_mgr.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc
similarity index 99%
rename from mindspore/ccsrc/device/gpu/gpu_buffer_mgr.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc
index 621ba557e52..a1b1fa9b79c 100644
--- a/mindspore/ccsrc/device/gpu/gpu_buffer_mgr.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/gpu/gpu_buffer_mgr.h"
+#include "runtime/device/gpu/gpu_buffer_mgr.h"
 #include <cuda_runtime_api.h>
 #include <utility>
 #include "utils/log_adapter.h"
diff --git a/mindspore/ccsrc/device/gpu/gpu_buffer_mgr.h b/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.h
similarity index 98%
rename from mindspore/ccsrc/device/gpu/gpu_buffer_mgr.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.h
index 5ce4a2cbdce..722a36c4ed0 100644
--- a/mindspore/ccsrc/device/gpu/gpu_buffer_mgr.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.h
@@ -25,7 +25,7 @@
 #include <vector>
 #include <string>
 #include <memory>
-#include "device/gpu/blocking_queue.h"
+#include "runtime/device/gpu/blocking_queue.h"
 
 #define EXPORT __attribute__((visibility("default")))
 
diff --git a/mindspore/ccsrc/device/gpu/gpu_common.h b/mindspore/ccsrc/runtime/device/gpu/gpu_common.h
similarity index 100%
rename from mindspore/ccsrc/device/gpu/gpu_common.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_common.h
diff --git a/mindspore/ccsrc/device/gpu/gpu_device_address.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
similarity index 93%
rename from mindspore/ccsrc/device/gpu/gpu_device_address.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
index 401eb9f34e6..a20a6a9a3c8 100644
--- a/mindspore/ccsrc/device/gpu/gpu_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "device/gpu/gpu_device_address.h"
+#include "runtime/device/gpu/gpu_device_address.h"
 #include <vector>
-#include "device/gpu/gpu_device_manager.h"
+#include "runtime/device/gpu/gpu_device_manager.h"
 #include "utils/log_adapter.h"
-#include "device/gpu/gpu_memory_allocator.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_device_address.h b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.h
similarity index 95%
rename from mindspore/ccsrc/device/gpu/gpu_device_address.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_device_address.h
index 4074cb6ce90..ade738deed4 100644
--- a/mindspore/ccsrc/device/gpu/gpu_device_address.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.h
@@ -19,7 +19,7 @@
 
 #include <string>
 #include <vector>
-#include "device/device_address.h"
+#include "runtime/device/device_address.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_device_manager.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc
similarity index 94%
rename from mindspore/ccsrc/device/gpu/gpu_device_manager.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc
index 9f5f37c606a..8f17fc20b5b 100644
--- a/mindspore/ccsrc/device/gpu/gpu_device_manager.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "device/gpu/gpu_device_manager.h"
-#include "device/gpu/gpu_common.h"
+#include "runtime/device/gpu/gpu_device_manager.h"
+#include "runtime/device/gpu/gpu_common.h"
 #include "utils/log_adapter.h"
 #include "utils/convert_utils.h"
-#include "device/gpu/gpu_buffer_mgr.h"
+#include "runtime/device/gpu/gpu_buffer_mgr.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_device_manager.h b/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.h
similarity index 93%
rename from mindspore/ccsrc/device/gpu/gpu_device_manager.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.h
index b6b630181eb..002806675ce 100644
--- a/mindspore/ccsrc/device/gpu/gpu_device_manager.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.h
@@ -21,8 +21,8 @@
 #include <cublas_v2.h>
 #include <vector>
 #include <memory>
-#include "device/gpu/cuda_driver.h"
-#include "device/gpu/gpu_memory_allocator.h"
+#include "runtime/device/gpu/cuda_driver.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc
similarity index 85%
rename from mindspore/ccsrc/device/gpu/gpu_kernel_build.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc
index 19d22845103..9d88a205bc7 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc
@@ -13,14 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/gpu/gpu_kernel_build.h"
+#include "runtime/device/gpu/gpu_kernel_build.h"
 #include <string>
-#include "kernel/kernel.h"
-#include "kernel/akg/akg_kernel_build.h"
-#include "kernel/akg/gpu/akg_gpu_kernel_build.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "operator/ops.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "backend/kernel_compiler/akg/akg_kernel_build.h"
+#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "frontend/operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
 namespace mindspore {
 namespace device {
 namespace gpu {
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_build.h b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.h
similarity index 95%
rename from mindspore/ccsrc/device/gpu/gpu_kernel_build.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.h
index 5770e4d3b18..831c4e95110 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_build.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.h
@@ -17,7 +17,7 @@
 #define MINDSPORE_CCSRC_DEVICE_GPU_GPUKERNELBUILD_H_
 
 #include <memory>
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 namespace mindspore {
 namespace device {
 namespace gpu {
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
similarity index 80%
rename from mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
index ad0e093d7ff..ddf73841b77 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
@@ -14,21 +14,21 @@
  * limitations under the License.
  */
 
-#include "device/gpu/gpu_kernel_runtime.h"
-#include "device/gpu/gpu_device_address.h"
-#include "device/gpu/cuda_driver.h"
-#include "device/gpu/gpu_buffer_mgr.h"
-#include "device/gpu/gpu_device_manager.h"
-#include "device/gpu/gpu_memory_allocator.h"
-#include "device/gpu/distribution/collective_init.h"
+#include "runtime/device/gpu/gpu_kernel_runtime.h"
+#include "runtime/device/gpu/gpu_device_address.h"
+#include "runtime/device/gpu/cuda_driver.h"
+#include "runtime/device/gpu/gpu_buffer_mgr.h"
+#include "runtime/device/gpu/gpu_device_manager.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
+#include "runtime/device/gpu/distribution/collective_init.h"
 #include "utils/convert_utils.h"
 #include "utils/context/ms_context.h"
-#include "device/kernel_runtime_manager.h"
-#include "device/gpu/gpu_common.h"
+#include "runtime/device/kernel_runtime_manager.h"
+#include "runtime/device/gpu/gpu_common.h"
 #include "common/utils.h"
-#include "device/gpu/gpu_memory_manager.h"
-#include "kernel/common_utils.h"
-#include "device/gpu/gpu_memory_copy_manager.h"
+#include "runtime/device/gpu/gpu_memory_manager.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/gpu/gpu_memory_copy_manager.h"
 
 namespace mindspore {
 namespace device {
@@ -137,6 +137,7 @@ void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) {
   if (is_enable_dynamic_mem) {
     // Use the dynamic memory pool.
     InitKernelRefCount(graph);
+    InitMemorySwapInfo(graph);
     InitKernelOutputAddress(graph);
   } else {
     AssignDynamicMemory(graph);
@@ -144,27 +145,24 @@ void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) {
 }
 
 bool GPUKernelRuntime::Run(session::KernelGraph *graph) {
+  struct timeval start_time, end_time;
+  (void)gettimeofday(&start_time, nullptr);
   bool ret = true;
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
   bool is_enable_dynamic_mem = context_ptr->enable_dynamic_mem_pool();
   bool is_enable_pynative_infer = context_ptr->enable_pynative_infer();
-  auto iter = mem_swap_map_.find(graph);
-  if (iter == mem_swap_map_.end()) {
-    GPUMemCopyManagerPtr gpu_mem_copy_manager = std::make_shared<GPUMemCopyManager>();
-    iter = mem_swap_map_.emplace(graph, std::make_shared<MemSwapManager>(gpu_mem_copy_manager)).first;
-  }
-  mem_swap_manager_ = iter->second;
-  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
-  struct timeval start_time, end_time;
-  (void)gettimeofday(&start_time, nullptr);
   if (is_enable_dynamic_mem && !is_enable_pynative_infer) {
+    auto graph_id = graph->graph_id();
+    auto iter = mem_swap_map_.find(graph_id);
+    if (iter == mem_swap_map_.end()) {
+      MS_LOG(EXCEPTION) << "Find memory swap map failed.";
+    }
+    mem_swap_manager_ = iter->second;
+    MS_EXCEPTION_IF_NULL(mem_swap_manager_);
     while (!LaunchKernelDynamic(graph)) {
-      ClearKernelOutputAddress(graph);
-      if (!mem_swap_manager_->mem_swap_init()) {
-        mem_swap_manager_->Init(graph);
-      }
-      if (!mem_swap_manager_->RetreatSwapInfo()) {
+      MS_LOG(WARNING) << "Run out of memory and try memory swapping, it may take some time, please wait a moment.";
+      if (!UpdateMemorySwapInfo(graph)) {
         return false;
       }
     }
@@ -197,6 +195,16 @@ void GPUKernelRuntime::InitKernelRefCount(const session::KernelGraph *graph) {
   mem_reuse_util_map_[graph_id] = mem_reuse_util_ptr;
 }
 
+void GPUKernelRuntime::InitMemorySwapInfo(const session::KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(graph);
+  GPUMemCopyManagerPtr gpu_mem_copy_manager = std::make_shared<GPUMemCopyManager>();
+  MS_EXCEPTION_IF_NULL(gpu_mem_copy_manager);
+  MemSwapManagerPtr mem_swap_manager = std::make_shared<MemSwapManager>(gpu_mem_copy_manager);
+  MS_EXCEPTION_IF_NULL(mem_swap_manager);
+  auto graph_id = graph->graph_id();
+  mem_swap_map_[graph_id] = mem_swap_manager;
+}
+
 void GPUKernelRuntime::InitKernelOutputAddress(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
   auto &kernels = graph->execution_order();
@@ -227,7 +235,6 @@ void GPUKernelRuntime::ClearKernelOutputAddress(const session::KernelGraph *grap
       if (!AnfAlgo::OutputAddrExist(kernel, i)) {
         continue;
       }
-
       auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i, false);
       if (device_address->ptr_) {
         mem_manager_->FreeMemFromMemPool(device_address);
@@ -239,9 +246,12 @@ void GPUKernelRuntime::ClearKernelOutputAddress(const session::KernelGraph *grap
 
 bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   auto graph_id = graph->graph_id();
-  auto mem_reuse_util_ptr = mem_reuse_util_map_[graph_id];
+  auto iter = mem_reuse_util_map_.find(graph_id);
+  if (iter == mem_reuse_util_map_.end()) {
+    MS_LOG(EXCEPTION) << "Find memory reuse map failed.";
+  }
+  auto mem_reuse_util_ptr = iter->second;
   MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr);
   // Reset the reference count.
   mem_reuse_util_ptr->ResetDynamicUsedRefCount();
@@ -263,27 +273,14 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph) {
       MS_LOG(EXCEPTION) << "Launch kernel failed.";
     }
     FreeKernelDynamicRes(kernel, kernel_workspaces, graph_id);
-
-    if (mem_swap_manager_->trigger_swap() && mem_swap_manager_->QueryKernelTriggerSwap(kernel)) {
-      CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed.");
-      if (!AddMemSwapTask(kernel)) {
-        return false;
-      }
-    }
-
-    if (mem_swap_manager_->trigger_swap()) {
-      mem_swap_manager_->SyncMemCopyStream(SwapKind::kDeviceToHost);
-    }
+    UpdateMemorySwapTask(kernel);
   }
-
   CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed.");
-  if (mem_swap_manager_->trigger_swap()) {
-    mem_swap_manager_->ClearSwapQueue();
-  }
+  ClearSwapQueue();
   return true;
 }
 
-bool GPUKernelRuntime::AddMemSwapTask(const AnfNodePtr &kernel) {
+bool GPUKernelRuntime::AddMemorySwapTask(const AnfNodePtr &kernel) {
   MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   auto &mem_swap_info_list = mem_swap_manager_->QueryKernelMemSwapInfo(kernel);
   for (auto &mem_swap_info : mem_swap_info_list) {
@@ -311,14 +308,92 @@ bool GPUKernelRuntime::AddMemSwapTask(const AnfNodePtr &kernel) {
   return true;
 }
 
+bool GPUKernelRuntime::UpdateMemorySwapInfo(const session::KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
+  ClearKernelOutputAddress(graph);
+  if (!mem_swap_manager_->mem_swap_init()) {
+    mem_swap_manager_->Init(graph);
+  }
+  return mem_swap_manager_->RetreatSwapInfo();
+}
+
+bool GPUKernelRuntime::UpdateMemorySwapTask(const AnfNodePtr &kernel) {
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
+  if (!mem_swap_manager_->trigger_swap()) {
+    return true;
+  }
+  if (mem_swap_manager_->QueryKernelTriggerSwap(kernel)) {
+    CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed.");
+    if (!AddMemorySwapTask(kernel)) {
+      return false;
+    }
+  }
+  CHECK_OP_RET_WITH_EXCEPT(mem_swap_manager_->SyncMemCopyStream(SwapKind::kDeviceToHost), "SyncCopyStream failed.");
+  return true;
+}
+
+void GPUKernelRuntime::UpdateHostSwapQueue(const DeviceAddressPtr device_address) {
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
+  if (!mem_swap_manager_->trigger_swap()) {
+    return;
+  }
+  while (auto device_address_swap_in = mem_swap_manager_->UpdateSwapQueue(SwapKind::kHostToDevice)) {
+    device_address_swap_in->set_status(DeviceAddressStatus::kInDevice);
+  }
+  auto status = device_address->status();
+  switch (status) {
+    case DeviceAddressStatus::kInDevice:
+      break;
+    case DeviceAddressStatus::kInDeviceToHost: {
+      mem_swap_manager_->InsertSwapInBlackList(device_address->ptr_);
+      device_address->set_status(DeviceAddressStatus::kInDevice);
+      break;
+    }
+    case DeviceAddressStatus::kInHostToDevice: {
+      while (device_address->status() != DeviceAddressStatus::kInDevice) {
+        while (auto device_address_swap_in = mem_swap_manager_->UpdateSwapQueue(SwapKind::kHostToDevice)) {
+          device_address_swap_in->set_status(DeviceAddressStatus::kInDevice);
+        }
+      }
+      break;
+    }
+    case DeviceAddressStatus::kInHost:
+      MS_LOG(ERROR) << "Invaild device address status:" << status;
+      break;
+    default:
+      MS_LOG(EXCEPTION) << "Invaild device address status:" << status;
+  }
+}
+
+void GPUKernelRuntime::UpdateDeviceSwapQueue() {
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
+  if (!mem_swap_manager_->trigger_swap()) {
+    return;
+  }
+  while (auto device_address_swap_out = mem_swap_manager_->UpdateSwapQueue(SwapKind::kDeviceToHost)) {
+    if (!mem_swap_manager_->FindInSwapInBlackList(device_address_swap_out->ptr_) && device_address_swap_out->ptr_) {
+      device_address_swap_out->set_status(DeviceAddressStatus::kInHost);
+      mem_manager_->FreeMemFromMemPool(device_address_swap_out);
+    }
+  }
+}
+
+void GPUKernelRuntime::ClearSwapQueue() {
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
+  if (!mem_swap_manager_->trigger_swap()) {
+    return;
+  }
+  mem_swap_manager_->ClearSwapQueue();
+}
+
 bool GPUKernelRuntime::AttemptMallocMem(const DeviceAddressPtr &device_address, size_t size) {
   MS_EXCEPTION_IF_NULL(mem_manager_);
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   auto ret = mem_manager_->MallocMemFromMemPool(device_address, size);
   if (!ret) {
     if (!mem_swap_manager_->trigger_swap()) {
       return false;
     }
-
     mem_swap_manager_->SyncMemCopyStream(SwapKind::kDeviceToHost);
     while (auto device_address_swap_out = mem_swap_manager_->UpdateSwapQueue(SwapKind::kDeviceToHost)) {
       if (!mem_swap_manager_->FindInSwapInBlackList(device_address_swap_out->ptr_) && device_address_swap_out->ptr_) {
@@ -326,7 +401,6 @@ bool GPUKernelRuntime::AttemptMallocMem(const DeviceAddressPtr &device_address,
         mem_manager_->FreeMemFromMemPool(device_address_swap_out);
       }
     }
-
     ret = mem_manager_->MallocMemFromMemPool(device_address, size);
     if (!ret) {
       return false;
@@ -337,12 +411,12 @@ bool GPUKernelRuntime::AttemptMallocMem(const DeviceAddressPtr &device_address,
 
 void *GPUKernelRuntime::AttemptMallocMem(size_t size) {
   MS_EXCEPTION_IF_NULL(mem_manager_);
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   auto device_ptr = mem_manager_->MallocMemFromMemPool(size);
   if (!device_ptr) {
     if (!mem_swap_manager_->trigger_swap()) {
       return nullptr;
     }
-
     mem_swap_manager_->SyncMemCopyStream(SwapKind::kDeviceToHost);
     while (auto device_address_swap_out = mem_swap_manager_->UpdateSwapQueue(SwapKind::kDeviceToHost)) {
       if (!mem_swap_manager_->FindInSwapInBlackList(device_address_swap_out->ptr_) && device_address_swap_out->ptr_) {
@@ -350,7 +424,6 @@ void *GPUKernelRuntime::AttemptMallocMem(size_t size) {
         mem_manager_->FreeMemFromMemPool(device_address_swap_out);
       }
     }
-
     device_ptr = mem_manager_->MallocMemFromMemPool(size);
     if (!device_ptr) {
       return nullptr;
@@ -377,40 +450,11 @@ bool GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
 bool GPUKernelRuntime::AllocKernelInputDynamicRes(const mindspore::AnfNodePtr &kernel, AddressPtrList *kernel_inputs) {
   MS_EXCEPTION_IF_NULL(kernel);
   MS_EXCEPTION_IF_NULL(kernel_inputs);
-  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(kernel); ++i) {
     // Graph may be all nop nodes and not remove nop node, so this can not skip nop node.
     auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i, false);
     MS_EXCEPTION_IF_NULL(device_address);
-    if (mem_swap_manager_->trigger_swap()) {
-      while (auto device_address_swap_in = mem_swap_manager_->UpdateSwapQueue(SwapKind::kHostToDevice)) {
-        device_address_swap_in->set_status(DeviceAddressStatus::kInDevice);
-      }
-
-      auto status = device_address->status();
-      switch (status) {
-        case DeviceAddressStatus::kInDevice:
-          break;
-        case DeviceAddressStatus::kInHost:
-          break;
-        case DeviceAddressStatus::kInDeviceToHost: {
-          mem_swap_manager_->InsertSwapInBlackList(device_address->ptr_);
-          device_address->set_status(DeviceAddressStatus::kInDevice);
-          break;
-        }
-        case DeviceAddressStatus::kInHostToDevice: {
-          while (device_address->status() != DeviceAddressStatus::kInDevice) {
-            while (auto device_address_swap_in = mem_swap_manager_->UpdateSwapQueue(SwapKind::kHostToDevice)) {
-              device_address_swap_in->set_status(DeviceAddressStatus::kInDevice);
-            }
-          }
-          break;
-        }
-        default:
-          MS_LOG(ERROR) << "Invaild device address status";
-          return false;
-      }
-    }
+    UpdateHostSwapQueue(device_address);
     MS_EXCEPTION_IF_NULL(device_address->ptr_);
     kernel::AddressPtr input = std::make_shared<kernel::Address>();
     MS_EXCEPTION_IF_NULL(input);
@@ -426,16 +470,7 @@ bool GPUKernelRuntime::AllocKernelOutputDynamicRes(const mindspore::kernel::Kern
                                                    AddressPtrList *kernel_outputs) {
   MS_EXCEPTION_IF_NULL(kernel);
   MS_EXCEPTION_IF_NULL(kernel_outputs);
-  MS_EXCEPTION_IF_NULL(mem_manager_);
-  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
-  if (mem_swap_manager_->trigger_swap()) {
-    while (auto device_address_swap_out = mem_swap_manager_->UpdateSwapQueue(SwapKind::kDeviceToHost)) {
-      if (!mem_swap_manager_->FindInSwapInBlackList(device_address_swap_out->ptr_) && device_address_swap_out->ptr_) {
-        device_address_swap_out->set_status(DeviceAddressStatus::kInHost);
-        mem_manager_->FreeMemFromMemPool(device_address_swap_out);
-      }
-    }
-  }
+  UpdateDeviceSwapQueue();
   auto output_sizes = kernel_mod.GetOutputSizeList();
   for (size_t i = 0; i < output_sizes.size(); ++i) {
     auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i, false);
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h
similarity index 84%
rename from mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h
index ea3ab171606..2b1f8198ce1 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h
@@ -22,9 +22,9 @@
 #include <vector>
 #include <utility>
 #include <unordered_map>
-#include "device/kernel_runtime.h"
-#include "device/kernel_runtime_manager.h"
-#include "pre_activate/mem_reuse/mem_swap_manager.h"
+#include "runtime/device/kernel_runtime.h"
+#include "runtime/device/kernel_runtime_manager.h"
+#include "backend/optimizer/mem_reuse/mem_swap_manager.h"
 
 namespace mindspore {
 namespace device {
@@ -53,9 +53,9 @@ class GPUKernelRuntime : public KernelRuntime {
   // The related functions and members for using dynamic memory pool.
   void InitKernelRefCount(const session::KernelGraph *graph);
   void InitKernelOutputAddress(const session::KernelGraph *graph);
+  void InitMemorySwapInfo(const session::KernelGraph *graph);
   void ClearKernelOutputAddress(const session::KernelGraph *graph);
   bool LaunchKernelDynamic(const session::KernelGraph *graph);
-  bool AddMemSwapTask(const AnfNodePtr &kernel);
   bool AttemptMallocMem(const DeviceAddressPtr &device_address, size_t size);
   void *AttemptMallocMem(size_t size);
   bool AllocKernelDynamicRes(const mindspore::kernel::KernelMod &kernel_mod, const mindspore::AnfNodePtr &kernel,
@@ -74,8 +74,14 @@ class GPUKernelRuntime : public KernelRuntime {
                                   std::vector<size_t> size_list);
   void FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, const AddressPtrList &kernel_workspaces,
                             uint32_t graph_id);
+  bool AddMemorySwapTask(const AnfNodePtr &kernel);
+  bool UpdateMemorySwapInfo(const session::KernelGraph *graph);
+  bool UpdateMemorySwapTask(const AnfNodePtr &kernel);
+  void UpdateHostSwapQueue(const DeviceAddressPtr device_address);
+  void UpdateDeviceSwapQueue();
+  void ClearSwapQueue();
   std::unordered_map<uint32_t, MemReuseUtilPtr> mem_reuse_util_map_;
-  std::unordered_map<void *, MemSwapManagerPtr> mem_swap_map_;
+  std::unordered_map<uint32_t, MemSwapManagerPtr> mem_swap_map_;
   MemSwapManagerPtr mem_swap_manager_{nullptr};
 };
 MS_REG_KERNEL_RUNTIME(kGPUDevice, GPUKernelRuntime);
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.cc
similarity index 95%
rename from mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.cc
index 91379456619..e2395bbaf25 100644
--- a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.cc
@@ -15,8 +15,8 @@
  */
 
 #include <algorithm>
-#include "device/gpu/gpu_memory_allocator.h"
-#include "device/gpu/cuda_driver.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
+#include "runtime/device/gpu/cuda_driver.h"
 #include "utils/log_adapter.h"
 #include "utils/context/ms_context.h"
 #include "utils/convert_utils_base.h"
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.h b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.h
similarity index 91%
rename from mindspore/ccsrc/device/gpu/gpu_memory_allocator.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.h
index 90d77910574..4b6eaa4e14b 100644
--- a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_ALLOCATOR_H_
 
 #include <memory>
-#include "device/gpu/cuda_driver.h"
-#include "pre_activate/mem_reuse/mem_dynamic_allocator.h"
+#include "runtime/device/gpu/cuda_driver.h"
+#include "backend/optimizer/mem_reuse/mem_dynamic_allocator.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_copy_manager.cc
similarity index 96%
rename from mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_memory_copy_manager.cc
index 80206f309d8..0406c0f151b 100644
--- a/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_copy_manager.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "device/gpu/gpu_memory_copy_manager.h"
-#include "device/gpu/gpu_common.h"
-#include "device/gpu/gpu_device_manager.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/gpu/gpu_memory_copy_manager.h"
+#include "runtime/device/gpu/gpu_common.h"
+#include "runtime/device/gpu/gpu_device_manager.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.h b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_copy_manager.h
similarity index 91%
rename from mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_memory_copy_manager.h
index 36ff273015f..dc99b7f7d0b 100644
--- a/mindspore/ccsrc/device/gpu/gpu_memory_copy_manager.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_copy_manager.h
@@ -20,10 +20,10 @@
 #include <memory>
 #include <queue>
 #include <utility>
-#include "pre_activate/mem_reuse/mem_copy_manager.h"
-#include "device/device_address.h"
-#include "device/gpu/cuda_driver.h"
-#include "kernel/kernel.h"
+#include "backend/optimizer/mem_reuse/mem_copy_manager.h"
+#include "runtime/device/device_address.h"
+#include "runtime/device/gpu/cuda_driver.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc
similarity index 97%
rename from mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc
index 9a63921adda..ffa07eea0d8 100644
--- a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "device/gpu/gpu_memory_manager.h"
-#include "device/gpu/gpu_memory_allocator.h"
+#include "runtime/device/gpu/gpu_memory_manager.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
 #include "utils/context/ms_context.h"
 #include "utils/convert_utils.h"
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.h b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.h
similarity index 97%
rename from mindspore/ccsrc/device/gpu/gpu_memory_manager.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.h
index c79fb9cc22e..533116cefce 100644
--- a/mindspore/ccsrc/device/gpu/gpu_memory_manager.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.h
@@ -17,7 +17,7 @@
 #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_
 #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_
 #include <vector>
-#include "device/memory_manager.h"
+#include "runtime/device/memory_manager.h"
 namespace mindspore {
 namespace device {
 namespace gpu {
diff --git a/mindspore/ccsrc/device/gpu/gpu_stream_assign.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc
similarity index 97%
rename from mindspore/ccsrc/device/gpu/gpu_stream_assign.cc
rename to mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc
index 42cdcf29ecf..78915f10d7d 100644
--- a/mindspore/ccsrc/device/gpu/gpu_stream_assign.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "device/gpu/gpu_stream_assign.h"
+#include "runtime/device/gpu/gpu_stream_assign.h"
 #include <set>
 #include <string>
 #include <memory>
 #include <algorithm>
-#include "device/gpu/gpu_common.h"
-#include "device/gpu/kernel_info_setter.h"
-#include "device/gpu/gpu_device_manager.h"
+#include "runtime/device/gpu/gpu_common.h"
+#include "runtime/device/gpu/kernel_info_setter.h"
+#include "runtime/device/gpu/gpu_device_manager.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/gpu_stream_assign.h b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.h
similarity index 97%
rename from mindspore/ccsrc/device/gpu/gpu_stream_assign.h
rename to mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.h
index f8041878b27..f22ce8fe386 100644
--- a/mindspore/ccsrc/device/gpu/gpu_stream_assign.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.h
@@ -20,8 +20,8 @@
 #include <vector>
 #include <string>
 #include <memory>
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/gpu/kernel_info_setter.cc b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
similarity index 94%
rename from mindspore/ccsrc/device/gpu/kernel_info_setter.cc
rename to mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
index 42e76e2483c..4326987784c 100644
--- a/mindspore/ccsrc/device/gpu/kernel_info_setter.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
@@ -14,18 +14,18 @@
  * limitations under the License.
  */
 
-#include "device/gpu/kernel_info_setter.h"
+#include "runtime/device/gpu/kernel_info_setter.h"
 #include <string>
 #include <memory>
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "utils/utils.h"
-#include "kernel/gpu/gpu_kernel_factory.h"
-#include "kernel/kernel_build_info.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/common_utils.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/common_utils.h"
 #include "common/utils.h"
-#include "kernel/oplib/oplib.h"
-#include "kernel/oplib/opinfo.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
+#include "backend/kernel_compiler/oplib/opinfo.h"
 
 namespace mindspore {
 namespace device {
@@ -88,10 +88,11 @@ std::string SupportedTypeList(const CNodePtr &kernel_node) {
       supported_akg_type_list = supported_akg_type_list + mindspore::kernel::TypeId2String(type);
     }
     supported_type_lists = supported_type_lists + supported_akg_type_list + "], out[";
+    supported_akg_type_list.clear();
     for (auto type : supported_akg_type_out) {
       supported_akg_type_list = supported_akg_type_list + mindspore::kernel::TypeId2String(type);
     }
-    supported_type_lists += "]; ";
+    supported_type_lists = supported_type_lists + supported_akg_type_list + "]; ";
   }
   return supported_type_lists;
 }
diff --git a/mindspore/ccsrc/device/gpu/kernel_info_setter.h b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h
similarity index 100%
rename from mindspore/ccsrc/device/gpu/kernel_info_setter.h
rename to mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.h
diff --git a/mindspore/ccsrc/device/gpu/mpi/mpi_initializer.cc b/mindspore/ccsrc/runtime/device/gpu/mpi/mpi_initializer.cc
similarity index 97%
rename from mindspore/ccsrc/device/gpu/mpi/mpi_initializer.cc
rename to mindspore/ccsrc/runtime/device/gpu/mpi/mpi_initializer.cc
index bcad74e5b5e..4605a0eb4e3 100644
--- a/mindspore/ccsrc/device/gpu/mpi/mpi_initializer.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/mpi/mpi_initializer.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/gpu/mpi/mpi_initializer.h"
+#include "runtime/device/gpu/mpi/mpi_initializer.h"
 
 #include <mpi.h>
 #include <pybind11/operators.h>
diff --git a/mindspore/ccsrc/device/gpu/mpi/mpi_initializer.h b/mindspore/ccsrc/runtime/device/gpu/mpi/mpi_initializer.h
similarity index 100%
rename from mindspore/ccsrc/device/gpu/mpi/mpi_initializer.h
rename to mindspore/ccsrc/runtime/device/gpu/mpi/mpi_initializer.h
diff --git a/mindspore/ccsrc/device/gpu/readme.md b/mindspore/ccsrc/runtime/device/gpu/readme.md
similarity index 100%
rename from mindspore/ccsrc/device/gpu/readme.md
rename to mindspore/ccsrc/runtime/device/gpu/readme.md
diff --git a/mindspore/ccsrc/device/kernel_adjust.cc b/mindspore/ccsrc/runtime/device/kernel_adjust.cc
similarity index 96%
rename from mindspore/ccsrc/device/kernel_adjust.cc
rename to mindspore/ccsrc/runtime/device/kernel_adjust.cc
index fd0a8eb967b..bb1f7f723ee 100644
--- a/mindspore/ccsrc/device/kernel_adjust.cc
+++ b/mindspore/ccsrc/runtime/device/kernel_adjust.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/kernel_adjust.h"
+#include "runtime/device/kernel_adjust.h"
 
 #include <map>
 #include <algorithm>
@@ -23,17 +23,18 @@
 #include <unordered_map>
 #include <vector>
 
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/context/ms_context.h"
 #include "common/trans.h"
 #include "utils/config_manager.h"
 #include "common/utils.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include "utils/utils.h"
-#include "device/ascend/profiling/profiling_manager.h"
-#include "device/ascend/kernel_select_ascend.h"
+#include "runtime/device/ascend/profiling/profiling_manager.h"
+#include "runtime/device/ascend/kernel_select_ascend.h"
 #include "runtime/base.h"
-#include "device/ascend/ascend_stream_assign.h"
+#include "runtime/device/ascend/ascend_stream_assign.h"
+
 namespace mindspore {
 namespace device {
 using device::ascend::ProfilingUtils;
@@ -117,6 +118,7 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph>
   std::vector<AnfNodePtr> *mute_inputs = kernel_graph_ptr->MutableInputs();
   MS_EXCEPTION_IF_NULL(mute_inputs);
   mute_inputs->push_back(switch_loop_input[kLoopCountParamName]);
+  mute_inputs->push_back(switch_loop_input[kEpochParamName]);
   mute_inputs->push_back(switch_loop_input[kIterLoopParamName]);
   mute_inputs->push_back(switch_loop_input[kZeroParamName]);
   mute_inputs->push_back(switch_loop_input[kOneParamName]);
@@ -316,6 +318,13 @@ void KernelAdjust::CreateSwitchOpParameters(const std::shared_ptr<session::Kerne
   one->set_abstract(paremeter_abstract_ptr);
   ParameterPtr one_new = kernel_graph_ptr->NewParameter(one);
   (*switch_loop_input)[kOneParamName] = one_new;
+
+  ParameterPtr epoch = std::make_shared<Parameter>(kernel_graph_ptr);
+  MS_EXCEPTION_IF_NULL(epoch);
+  epoch->set_name(kEpochParamName);
+  epoch->set_abstract(paremeter_abstract_ptr);
+  ParameterPtr epoch_new = kernel_graph_ptr->NewParameter(epoch);
+  (*switch_loop_input)[kEpochParamName] = epoch_new;
 }
 
 kernel::KernelBuildInfo::KernelBuildInfoBuilder KernelAdjust::CreateMngKernelBuilder(
@@ -510,6 +519,14 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
   *val = 0;
   inputs->push_back(loop_count_tensor);
 
+  // Epoch in device
+  tensor::TensorPtr epoch_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp);
+  MS_EXCEPTION_IF_NULL(epoch_tensor);
+  val = static_cast<int32_t *>(epoch_tensor->data_c());
+  MS_EXCEPTION_IF_NULL(val);
+  *val = 0;
+  inputs->push_back(epoch_tensor);
+
   tensor::TensorPtr iter_loop_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp);
   MS_EXCEPTION_IF_NULL(iter_loop_tensor);
   val = static_cast<int32_t *>(iter_loop_tensor->data_c());
@@ -531,6 +548,7 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
   MS_EXCEPTION_IF_NULL(val);
   *val = 1;
   inputs->push_back(one_tensor);
+
   MS_LOG(INFO) << "---------------- LoadSwitchInputs End--";
 }
 
diff --git a/mindspore/ccsrc/device/kernel_adjust.h b/mindspore/ccsrc/runtime/device/kernel_adjust.h
similarity index 92%
rename from mindspore/ccsrc/device/kernel_adjust.h
rename to mindspore/ccsrc/runtime/device/kernel_adjust.h
index bf3ba2acb25..dbd6f226af7 100644
--- a/mindspore/ccsrc/device/kernel_adjust.h
+++ b/mindspore/ccsrc/runtime/device/kernel_adjust.h
@@ -23,12 +23,12 @@
 #include <vector>
 #include <unordered_set>
 #include "ir/anf.h"
-#include "session/kernel_graph.h"
-#include "kernel/kernel_build_info.h"
-#include "session/session_context.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/session/session_context.h"
 #include "ir/tensor.h"
-#include "device/ascend/profiling/profiling_utils.h"
-#include "device/kernel_info.h"
+#include "runtime/device/ascend/profiling/profiling_utils.h"
+#include "runtime/device/kernel_info.h"
 
 using mindspore::device::ascend::ProfilingTraceInfo;
 using mindspore::device::ascend::ProfilingUtils;
@@ -37,6 +37,7 @@ constexpr auto kLoopCountParamName = "loop_count";
 constexpr auto kIterLoopParamName = "iter_loop";
 constexpr auto kZeroParamName = "zero";
 constexpr auto kOneParamName = "one";
+constexpr auto kEpochParamName = "loop_epoch";
 constexpr auto kStreamNeedActivedFirst = "stream_need_active_first";
 constexpr uint32_t kSecondStreamSwitchLabel = 2;
 
diff --git a/mindspore/ccsrc/device/kernel_info.cc b/mindspore/ccsrc/runtime/device/kernel_info.cc
similarity index 99%
rename from mindspore/ccsrc/device/kernel_info.cc
rename to mindspore/ccsrc/runtime/device/kernel_info.cc
index 59c9b0f4111..692532e70b3 100644
--- a/mindspore/ccsrc/device/kernel_info.cc
+++ b/mindspore/ccsrc/runtime/device/kernel_info.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 
 namespace mindspore {
 namespace device {
diff --git a/mindspore/ccsrc/device/kernel_info.h b/mindspore/ccsrc/runtime/device/kernel_info.h
similarity index 91%
rename from mindspore/ccsrc/device/kernel_info.h
rename to mindspore/ccsrc/runtime/device/kernel_info.h
index 84cfaa0fa31..baded9d9a3a 100644
--- a/mindspore/ccsrc/device/kernel_info.h
+++ b/mindspore/ccsrc/runtime/device/kernel_info.h
@@ -19,15 +19,16 @@
 
 #include <vector>
 #include <memory>
-#include "kernel/kernel_build_info.h"
-#include "device/ascend/ascend_device_address.h"
-#include "kernel/kernel.h"
+#include "ir/kernel_info_dev.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "runtime/device/ascend/ascend_device_address.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 const uint32_t kInvalidGraphId = UINT32_MAX;
 const uint32_t kInvalidDistincLabel = UINT32_MAX;
 namespace device {
-class KernelInfo {
+class KernelInfo : public KernelInfoDevice {
  public:
   KernelInfo() {
     kernel_mod_ = nullptr;
@@ -41,6 +42,7 @@ class KernelInfo {
   }
   virtual ~KernelInfo() = default;
 
+  bool has_build_info() const override { return select_kernel_build_info() != nullptr; }
   const kernel::KernelBuildInfo *select_kernel_build_info() const;
   kernel::KernelBuildInfoPtr GetMutableSelectKernelBuildInfo() const;
   void set_select_kernel_build_info(const kernel::KernelBuildInfoPtr &select_kernel_build_info) {
diff --git a/mindspore/ccsrc/device/kernel_runtime.cc b/mindspore/ccsrc/runtime/device/kernel_runtime.cc
similarity index 96%
rename from mindspore/ccsrc/device/kernel_runtime.cc
rename to mindspore/ccsrc/runtime/device/kernel_runtime.cc
index 27cf1dfc923..3de9af8c232 100644
--- a/mindspore/ccsrc/device/kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/kernel_runtime.h"
+#include "runtime/device/kernel_runtime.h"
 #include <vector>
 #include <utility>
 #include <numeric>
@@ -23,12 +23,12 @@
 #include "common/trans.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
-#include "operator/ops.h"
-#include "pipeline/parse/python_adapter.h"
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/common_utils.h"
-#include "kernel/oplib/oplib.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 #include "ir/value.h"
 using mindspore::kernel::Address;
 using mindspore::kernel::AddressPtr;
@@ -214,8 +214,10 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr>
     auto output_size = AnfAlgo::GetOutputTensorNum(item);
     for (size_t index = 0; index < output_size; index++) {
       MS_EXCEPTION_IF_NULL(input_tensors[input_index]);
-      if (input_tensors[input_index]->device_address().get() != nullptr) {
-        AnfAlgo::SetOutputAddr(input_tensors[input_index]->device_address(), index, item.get());
+      auto output_address =
+        std::dynamic_pointer_cast<device::DeviceAddress>(input_tensors[input_index]->device_address());
+      if (output_address != nullptr) {
+        AnfAlgo::SetOutputAddr(output_address, index, item.get());
         continue;
       }
       TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(item, index);
@@ -292,6 +294,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(mem_manager_);
   auto graph_inputs = graph->inputs();
   auto graph_valid_input = graph->valid_inputs();
+  graph_inputs.insert(graph_inputs.end(), graph->child_graph_result().begin(), graph->child_graph_result().end());
   std::vector<AnfNodePtr> need_alloc_nodes;
   for (size_t i = 0; i < graph_inputs.size(); ++i) {
     auto item = graph_inputs[i];
@@ -431,6 +434,10 @@ void KernelRuntime::AssignCommunicationNodeOutputMem(int flag, const AnfNodePtr
     std::string output_format = AnfAlgo::GetOutputFormat(node, j);
     auto output_type = AnfAlgo::GetOutputDeviceDataType(node, j);
     auto address = CreateDeviceAddress(output_ptr, output_sizes[j], output_format, output_type);
+    MS_EXCEPTION_IF_NULL(address);
+    if (AnfAlgo::IsCommunicationOp(node) && context_ptr->enable_hccl()) {
+      address->UpdateCommunicationAddress();
+    }
     AnfAlgo::SetOutputAddr(address, j, node.get());
     output_ptr += align_size_list[j];
   }
@@ -480,6 +487,8 @@ void KernelRuntime::AssignCommunicationNodeInputMem(const AnfNodePtr &node) {
 }
 
 void KernelRuntime::AssignNodeOutputMem(int flag, const AnfNodePtr &node, int index) {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
   MS_EXCEPTION_IF_NULL(node);
   MS_EXCEPTION_IF_NULL(mem_manager_);
   if (AnfAlgo::IsGetNext(NOT_NULL(node)) && flag == kReuseDynamicMem) {
@@ -509,7 +518,11 @@ void KernelRuntime::AssignNodeOutputMem(int flag, const AnfNodePtr &node, int in
     std::string output_format = AnfAlgo::GetOutputFormat(node, i);
     auto output_type = AnfAlgo::GetOutputDeviceDataType(node, i);
     auto device_address = CreateDeviceAddress(ptr, output_sizes[i], output_format, output_type);
+    MS_EXCEPTION_IF_NULL(device_address);
     device_address->set_host_shape(trans::GetRuntimePaddingShape(node, i));
+    if (AnfAlgo::IsCommunicationOp(node) && context_ptr->enable_hccl()) {
+      device_address->UpdateCommunicationAddress();
+    }
     AnfAlgo::SetOutputAddr(device_address, i, node.get());
   }
 }
diff --git a/mindspore/ccsrc/device/kernel_runtime.h b/mindspore/ccsrc/runtime/device/kernel_runtime.h
similarity index 95%
rename from mindspore/ccsrc/device/kernel_runtime.h
rename to mindspore/ccsrc/runtime/device/kernel_runtime.h
index 8c6a5eb19bf..8320355b82a 100644
--- a/mindspore/ccsrc/device/kernel_runtime.h
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <map>
 
-#include "device/device_address.h"
+#include "runtime/device/device_address.h"
 #include "ir/tensor.h"
 #include "predict/generator/utils/ir_model_util.h"
 #ifdef ENABLE_DUMP_E2E
@@ -30,11 +30,11 @@
 #ifdef ENABLE_DEBUGGER
 #include "debug/debugger/debugger.h"
 #endif
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/kernel.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel.h"
 #include "utils/context/ms_context.h"
-#include "device/memory_manager.h"
+#include "runtime/device/memory_manager.h"
 
 using mindspore::tensor::Tensor;
 using std::vector;
diff --git a/mindspore/ccsrc/device/kernel_runtime_manager.cc b/mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc
similarity index 98%
rename from mindspore/ccsrc/device/kernel_runtime_manager.cc
rename to mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc
index 29d74762b4a..626259f9ce2 100644
--- a/mindspore/ccsrc/device/kernel_runtime_manager.cc
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/kernel_runtime_manager.h"
+#include "runtime/device/kernel_runtime_manager.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/device/kernel_runtime_manager.h b/mindspore/ccsrc/runtime/device/kernel_runtime_manager.h
similarity index 98%
rename from mindspore/ccsrc/device/kernel_runtime_manager.h
rename to mindspore/ccsrc/runtime/device/kernel_runtime_manager.h
index 89b45ff5f86..7fcb40ae671 100644
--- a/mindspore/ccsrc/device/kernel_runtime_manager.h
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime_manager.h
@@ -23,7 +23,7 @@
 #include <utility>
 #include <mutex>
 #include "common/utils.h"
-#include "device/kernel_runtime.h"
+#include "runtime/device/kernel_runtime.h"
 namespace mindspore {
 namespace device {
 using KernelRuntimeCreator = std::function<std::shared_ptr<KernelRuntime>()>;
diff --git a/mindspore/ccsrc/device/memory_manager.cc b/mindspore/ccsrc/runtime/device/memory_manager.cc
similarity index 91%
rename from mindspore/ccsrc/device/memory_manager.cc
rename to mindspore/ccsrc/runtime/device/memory_manager.cc
index 5efbcd8a369..563d5f0f501 100644
--- a/mindspore/ccsrc/device/memory_manager.cc
+++ b/mindspore/ccsrc/runtime/device/memory_manager.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "device/memory_manager.h"
-#include "session/anf_runtime_algorithm.h"
+#include "runtime/device/memory_manager.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/context/ms_context.h"
 using mindspore::memreuse::BestFitMemReuse;
 using mindspore::memreuse::MemReuseUtilPtr;
@@ -99,6 +99,11 @@ uint8_t *MemoryManager::MallocStaticMem(size_t size, bool communication_mem) {
   } else {
     align_size = GetCommonAlignSize(size);
   }
+
+  MS_LOG(INFO) << "Malloc Memory for Static: total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
+               << "] static[" << total_static_size_ << "])"
+               << " malloc [" << align_size << "] communication_mem: " << communication_mem;
+
   if (static_mem_offset_ < align_size) {
     MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
                       << "] static[" << total_static_size_ << "])"
@@ -126,6 +131,11 @@ uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
   } else {
     align_size = GetCommonAlignSize(size);
   }
+
+  MS_LOG(INFO) << "Malloc Memory for Dynamic: total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
+               << "] static[" << total_static_size_ << "])"
+               << " malloc [" << align_size << "] communication_mem: " << communication_mem;
+
   uint64_t offset = dynamic_mem_offset_;
   auto new_offset = dynamic_mem_offset_ + align_size;
   if (new_offset > static_mem_offset_) {
diff --git a/mindspore/ccsrc/device/memory_manager.h b/mindspore/ccsrc/runtime/device/memory_manager.h
similarity index 94%
rename from mindspore/ccsrc/device/memory_manager.h
rename to mindspore/ccsrc/runtime/device/memory_manager.h
index be250e0f3f3..3c6fb1b39a4 100644
--- a/mindspore/ccsrc/device/memory_manager.h
+++ b/mindspore/ccsrc/runtime/device/memory_manager.h
@@ -18,8 +18,8 @@
 #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_
 #include <memory>
 #include <vector>
-#include "pre_activate/mem_reuse/mem_reuse.h"
-#include "pre_activate/mem_reuse/mem_reuse_allocator.h"
+#include "backend/optimizer/mem_reuse/mem_reuse.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
 namespace mindspore {
 namespace device {
 const int kStaticMem = 0;
@@ -36,7 +36,7 @@ class MemoryManager {
 
   virtual void MallocDeviceMemory() = 0;
   virtual void FreeDeviceMemory() = 0;
-  void ResetDynamicMemory() {
+  virtual void ResetDynamicMemory() {
     total_dynamic_size_ = 0;
     dynamic_mem_offset_ = 0;
   }
diff --git a/mindspore/ccsrc/transform/CMakeLists.txt b/mindspore/ccsrc/transform/graph_ir/CMakeLists.txt
similarity index 61%
rename from mindspore/ccsrc/transform/CMakeLists.txt
rename to mindspore/ccsrc/transform/graph_ir/CMakeLists.txt
index c783cc00601..3f062609d5d 100644
--- a/mindspore/ccsrc/transform/CMakeLists.txt
+++ b/mindspore/ccsrc/transform/graph_ir/CMakeLists.txt
@@ -1,9 +1,9 @@
 if (ENABLE_GE OR ENABLE_D)
     file(GLOB_RECURSE _TRANSFORM_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
     set_property(SOURCE ${_TRANSFORM_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_GE_ADPT)
-    add_library(_mindspore_transform_obj OBJECT ${_TRANSFORM_SRC_LIST})
+    add_library(_mindspore_transform_graph_ir_obj OBJECT ${_TRANSFORM_SRC_LIST})
 
     if (NOT ENABLE_GE)
-        target_compile_definitions(_mindspore_transform_obj PRIVATE NO_GE_CLIENT)
+        target_compile_definitions(_mindspore_transform_graph_ir_obj PRIVATE NO_GE_CLIENT)
     endif()
 endif ()
diff --git a/mindspore/ccsrc/transform/all_ops.h b/mindspore/ccsrc/transform/graph_ir/all_ops.h
similarity index 100%
rename from mindspore/ccsrc/transform/all_ops.h
rename to mindspore/ccsrc/transform/graph_ir/all_ops.h
diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/graph_ir/convert.cc
similarity index 92%
rename from mindspore/ccsrc/transform/convert.cc
rename to mindspore/ccsrc/transform/graph_ir/convert.cc
index f88e31fcd2f..7419dd2cc98 100644
--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/graph_ir/convert.cc
@@ -14,20 +14,21 @@
  * limitations under the License.
  */
 
-#include "transform/convert.h"
+#include "transform/graph_ir/convert.h"
 
 #include <inttypes.h>
 #include <algorithm>
 #include <stack>
 #include "utils/utils.h"
 
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/log_adapter.h"
 #include "utils/graph_utils.h"
 #include "utils/symbolic.h"
 #include "utils/config_manager.h"
 #include "utils/convert_utils.h"
 #include "./common.h"
+#include "utils/context/ms_context.h"
 
 namespace mindspore {
 namespace transform {
@@ -206,6 +207,7 @@ const char kNameRange[] = "Range";
 const char kNameSquareSumAll[] = "SquareSumAll";
 const char kNameAscendQuant[] = "AscendQuant";
 const char kNameAscendDequant[] = "AscendDequant";
+const char kNameCase[] = "Case";
 
 // -----------------OpAdapter initialization--------------
 std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_map() {
@@ -378,7 +380,7 @@ std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_ma
     {string(kNameBiasAdd), ADPT_DESC(BiasAdd)},
     {prim::kPrimRelu->name(), ADPT_DESC(Relu)},
 
-    {prim::kPrimMatMul->name(), ADPT_DESC(MatMul)},
+    {prim::kPrimMatMul->name(), ADPT_DESC(MatMulV2)},
 
     {string(kNameConst), ADPT_DESC(Constant, Const)},
     {string(kNameSoftmax), ADPT_DESC(SoftmaxV2)},
@@ -413,7 +415,8 @@ std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_ma
     {string(kNameRange), ADPT_DESC(RangeD)},
     {string(kNameSquareSumAll), ADPT_DESC(SquareSumAll)},
     {string(kNameAscendQuant), ADPT_DESC(AscendQuant)},
-    {string(kNameAscendDequant), ADPT_DESC(AscendDequant)}};
+    {string(kNameAscendDequant), ADPT_DESC(AscendDequant)},
+    {string(kNameCase), ADPT_DESC(Case)}};
 #ifdef ENABLE_GE
   adpt_map[string(kNamePrint)] = ADPT_DESC(Print);
   adpt_map[string(kNameApplyAdam)] = ADPT_DESC(ApplyAdamD);
@@ -435,13 +438,32 @@ PrimType GetCNodeFuncType(const CNodePtr cnode) {
   return kPrimTypeUnknown;
 }
 
+bool IsCaseNode(const CNodePtr node) {
+  if (!node->inputs().empty() && node->input(0)->isa<CNode>() &&
+      GetCNodeFuncName(node->input(0)->cast<CNodePtr>()) == "switch_layer") {
+    return true;
+  }
+  return false;
+}
+
+std::string GetCNodeTargetFuncName(const CNodePtr cnode) {
+  if (IsCaseNode(cnode)) {
+    return string(kNameCase);
+  }
+  auto name = GetCNodeFuncName(cnode);
+  if (name == "switch_layer") {
+    name = "";
+  }
+  return name;
+}
+
 OpAdapterPtr DfGraphConvertor::FindAdapter(const AnfNodePtr node, bool train) {
   if (node->isa<CNode>()) {
     auto cnode = node->cast<CNodePtr>();
 
     std::string name = kNameCustomOp;
     if (!IsCustomCNode(cnode)) {
-      name = GetCNodeFuncName(cnode);
+      name = GetCNodeTargetFuncName(cnode);
     }
 
     auto it_adpt = get_adpt_map().find(name);
@@ -959,7 +981,7 @@ void DfGraphConvertor::TraceOutput(const AnfNodePtr node) {
   auto c = anf_out->cast<CNodePtr>();
   std::string name = "";
   if (anf_out->isa<CNode>()) {
-    name = GetCNodeFuncName(c);
+    name = GetCNodeTargetFuncName(c);
   }
 
   if (name == "make_tuple") {
@@ -1031,6 +1053,99 @@ void SetupDatasetIterGetNextNode(const OperatorPtr &op) {
   return;
 }
 
+void DfGraphConvertor::SetSubgraph(AnfNodePtr node) {
+  if (!node->isa<CNode>()) {
+    return;
+  }
+  auto cnode = node->cast<CNodePtr>();
+  if (!IsCaseNode(cnode)) {
+    return;
+  }
+  std::vector<AnfNodePtr> case_inputs;
+  for (size_t i = 1; i < cnode->inputs().size(); i++) {
+    case_inputs.emplace_back(cnode->input(i));
+  }
+  std::shared_ptr<std::vector<DfGraph>> branches = std::make_shared<std::vector<DfGraph>>();
+  auto bnode = cnode->input(0)->cast<CNodePtr>()->input(2)->cast<CNodePtr>();
+
+  for (size_t i = 1; i < bnode->inputs().size(); i++) {
+    auto branch_node = bnode->input(i)->cast<CNodePtr>();
+    for (size_t j = 2; j < branch_node->inputs().size(); j++) {
+      if (std::find(case_inputs.begin(), case_inputs.end(), branch_node->input(j)) == case_inputs.end()) {
+        case_inputs.emplace_back(branch_node->input(j));
+      }
+    }
+  }
+
+  for (size_t i = 1; i < bnode->inputs().size(); i++) {
+    ProcessSubgraph(bnode->input(i), case_inputs);
+  }
+
+  for (size_t i = 1; i < bnode->inputs().size(); i++) {
+    branches->emplace_back(branches_map_[bnode->input(i).get()]);
+  }
+
+  if (op_cache_.find(node.get()) == op_cache_.end()) {
+    return;
+  }
+
+  OpAdapterPtr adpt = FindAdapter(node, training_);
+  if (nullptr == adpt) {
+    MS_LOG(DEBUG) << "Not found adapter";
+    return;
+  }
+
+  OperatorPtr op = Convert(node);
+  adpt->setSubgraph(op, 0, branches);
+  return;
+}
+
+void DfGraphConvertor::GetCaseNodeInput(const CNodePtr node, const CNodePtr input_node) {
+  std::vector<AnfNodePtr> case_inputs;
+  for (size_t i = 1; i < node->inputs().size(); i++) {
+    case_inputs.emplace_back(node->input(i));
+  }
+  std::shared_ptr<std::vector<DfGraph>> branches = std::make_shared<std::vector<DfGraph>>();
+  auto bnode = input_node->input(2)->cast<CNodePtr>();
+
+  for (size_t i = 1; i < bnode->inputs().size(); i++) {
+    auto branch_node = bnode->input(i)->cast<CNodePtr>();
+    for (size_t j = 2; j < branch_node->inputs().size(); j++) {
+      if (std::find(case_inputs.begin(), case_inputs.end(), branch_node->input(j)) == case_inputs.end()) {
+        case_inputs.emplace_back(branch_node->input(j));
+      }
+    }
+  }
+
+  const size_t case_index = 1;
+  const size_t make_tuple_index = 2;
+
+  AnfNodePtr case_index_iter = input_node->input(case_index);
+  AnfNodePtr make_tuple_iter = input_node->input(make_tuple_index);
+  auto make_tuple_node = make_tuple_iter->cast<CNodePtr>();
+  std::shared_ptr<std::vector<OutHandler>> tuple_items = std::make_shared<std::vector<OutHandler>>();
+
+  for (size_t i = 0; i < case_inputs.size(); i++) {
+    auto item = case_inputs[i];
+    auto op = Convert(item);
+    if (op != nullptr) {
+      tuple_items->emplace_back(OutHandler(op, ""));
+    } else if (out_handle_cache_.find(item.get()) != out_handle_cache_.end()) {
+      tuple_items->push_back(out_handle_cache_[item.get()]);
+    } else {
+      MS_LOG(WARNING) << "This anf node is not supported as a case input: " << item->ToString();
+      continue;
+    }
+  }
+
+  tuple_out_handle_cache_[make_tuple_node.get()] = tuple_items;
+
+  std::shared_ptr<std::vector<AnfNodePtr>> case_input_items = std::make_shared<std::vector<AnfNodePtr>>();
+  case_input_items->emplace_back(case_index_iter);
+  case_input_items->emplace_back(make_tuple_iter);
+  case_input_handle_cache_[node.get()] = case_input_items;
+}
+
 DfGraphConvertor &DfGraphConvertor::BuildGraph() {
   SetupDatasetIterGetNextNode(dataset_iter_getnext_);
 
@@ -1038,6 +1153,16 @@ DfGraphConvertor &DfGraphConvertor::BuildGraph() {
     return *this;
   }
 
+  // Case node set input.
+  std::vector<AnfNodePtr> nodes = ::mindspore::TopoSort(anf_graph_->get_return());
+  for (auto &it : nodes) {
+    if (it->isa<CNode>() && IsCaseNode(it->cast<CNodePtr>())) {
+      auto node = it->cast<CNodePtr>();
+      auto input_node = node->input(0)->cast<CNodePtr>();
+      GetCaseNodeInput(node, input_node);
+    }
+  }
+
   // update tuple_out_handle_cache_
   for (auto it : tuple_out_handle_cache_) {
     std::size_t len = it.second->size();
@@ -1058,10 +1183,11 @@ DfGraphConvertor &DfGraphConvertor::BuildGraph() {
 
   // set up dependices
   MS_LOG(DEBUG) << "set up dependices";
-  std::vector<AnfNodePtr> nodes = ::mindspore::TopoSort(anf_graph_->get_return());
+  nodes = ::mindspore::TopoSort(anf_graph_->get_return());
   for (auto &it : nodes) {
     SetNodeInput(it);
     SetOpControlInput(it);
+    SetSubgraph(it);
     UpdateOpDesc(it);
   }
 
@@ -1077,6 +1203,18 @@ DfGraphConvertor &DfGraphConvertor::BuildGraph() {
     inputs.push_back(*dataset_iter_getnext_);
   } else {
     auto params = anf_graph_->parameters();
+    if (use_inputs_) {
+      params = inputs_;
+      auto anf_params = anf_graph_->parameters();
+      for (size_t i = 0; i < params.size(); i++) {
+        for (size_t j = 0; j < anf_params.size(); j++) {
+          if (params[i]->ToString() == anf_params[j]->ToString()) {
+            params[i] = anf_params[j];
+          }
+        }
+      }
+    }
+
     int index = 0;
     for (auto &it : params) {
       auto name = std::static_pointer_cast<Parameter>(it)->name();
@@ -1187,10 +1325,21 @@ const std::vector<std::string> trans_var_list = {string(kNameAssign), string(kNa
 
 void DfGraphConvertor::SetOpInput(const OpAdapterPtr &adpt, const CNodePtr &node) {
   OperatorPtr src = Convert(node);
+  int case_flag = 0;
   auto &inputs = node->inputs();
-  for (size_t i = 1; i < inputs.size(); i++) {
+  size_t input_size = inputs.size();
+  if (case_input_handle_cache_.find(node.get()) != case_input_handle_cache_.end()) {
+    case_flag = 1;
+    input_size = case_input_handle_cache_[node.get()]->size() + 1;
+  }
+
+  for (size_t i = 1; i < input_size; i++) {
     auto pred = inputs[i];
-    while (pred->isa<CNode>() && GetCNodeFuncName(pred->cast<CNodePtr>()) == "Depend") {
+    if (case_flag != 0) {
+      pred = case_input_handle_cache_[node.get()]->at(i - 1);
+    }
+
+    while (pred->isa<CNode>() && GetCNodeTargetFuncName(pred->cast<CNodePtr>()) == "Depend") {
       pred = pred->cast<CNodePtr>()->input(1);
     }
     // skip the None input
@@ -1198,7 +1347,7 @@ void DfGraphConvertor::SetOpInput(const OpAdapterPtr &adpt, const CNodePtr &node
       continue;
     }
     // transform "Const" op to "Variable" op when the next node is "Assign" op.
-    std::string c_name = GetCNodeFuncName(node);
+    std::string c_name = GetCNodeTargetFuncName(node);
     auto pos = std::find(trans_var_list.begin(), trans_var_list.end(), c_name);
     if (!training_ && pos != trans_var_list.end() && pred->isa<Parameter>()) {
       std::string name = std::static_pointer_cast<Parameter>(pred)->name();
@@ -1222,7 +1371,7 @@ void DfGraphConvertor::SetOpInput(const OpAdapterPtr &adpt, const CNodePtr &node
     if (it != out_handle_cache_.end()) {
       int ret = adpt->setInput(src, SizeToInt(i), it->second);
       if (ret == 0) {
-        if (pred->isa<CNode>() && GetCNodeFuncName(pred->cast<CNodePtr>()) == "tuple_getitem") {
+        if (pred->isa<CNode>() && GetCNodeTargetFuncName(pred->cast<CNodePtr>()) == "tuple_getitem") {
           compute_sout_ << op_draw_name_[pred->cast<CNodePtr>()->input(1).get()] << " -> " << op_draw_name_[node.get()]
                         << ":" << i << endl;
         } else if (pred->isa<Parameter>()) {
@@ -1280,6 +1429,23 @@ void DfGraphConvertor::SetNodeInput(const AnfNodePtr node) {
   DfGraphConvertor::SetOpInput(adpt, cnode);
 }
 
+void DfGraphConvertor::ProcessSubgraph(AnfNodePtr node, const std::vector<AnfNodePtr> &inputs) {
+  if (!node->isa<CNode>() || GetCNodeFuncName(node->cast<CNodePtr>()) != "Partial") {
+    return;
+  }
+  auto graph_node = node->cast<CNodePtr>()->input(1)->cast<ValueNodePtr>();
+  FuncGraphPtr anf_graph = graph_node->value()->cast<FuncGraphPtr>();
+  DfGraphConvertor convertor(anf_graph);
+  convertor.use_inputs_ = true;
+  convertor.inputs_ = inputs;
+  (void)convertor.ConvertAllNode().BuildGraph();
+  std::string name = graph_node->ToString() + "_ge_graph.dot";
+  if (MsContext::GetInstance()->save_graphs_flag()) {
+    convertor.DrawComputeGraph(name);
+  }
+  branches_map_[node.get()] = *(convertor.df_graph_);
+}
+
 // Update GE op's shape and type info
 void DfGraphConvertor::UpdateOpDesc(const AnfNodePtr node) {
   if (nullptr == node || !node->isa<CNode>()) {
@@ -1350,6 +1516,7 @@ void DfGraphConvertor::ConvertMakeTuple(const CNodePtr node) {
     }
   }
 
+  MS_LOG(WARNING) << "ConvertMakeTuple: " << node.get() << " " << tuple_items->size();
   tuple_out_handle_cache_[node.get()] = tuple_items;
 }
 
@@ -1713,6 +1880,14 @@ bool DfGraphConvertor::CheckCNode(const std::string &name, const CNodePtr node)
     return false;
   }
 
+  if (name == "" && GetCNodeFuncName(node) == "switch_layer") {
+    return false;
+  }
+
+  if (name == "Partial") {
+    return false;
+  }
+
   // make_tuple is used for a dynamic_input, convert it to a vector of OutHandlers
   if (name == "make_tuple") {
     ConvertMakeTuple(node);
@@ -1734,7 +1909,7 @@ bool DfGraphConvertor::CheckCNode(const std::string &name, const CNodePtr node)
 }
 
 OperatorPtr DfGraphConvertor::ConvertCNode(const CNodePtr node) {
-  std::string name = GetCNodeFuncName(node);
+  std::string name = GetCNodeTargetFuncName(node);
   if (!CheckCNode(name, node)) {
     return nullptr;
   }
@@ -1881,7 +2056,7 @@ void DfGraphConvertor::DrawCNode(const CNodePtr node, const OpAdapterPtr adpt) {
   }
 
   compute_sout_ << "<tr><td colspan=\"" << (input_map.size() + dyn_input_map.size()) << "\">\"" << node->ToString()
-                << ":" << GetCNodeFuncName(node) << "\"</td></tr>" << endl;
+                << ":" << GetCNodeTargetFuncName(node) << "\"</td></tr>" << endl;
 
   // print attrs' values
   auto atts = adpt->GetAttrsFromDrawGraph();
diff --git a/mindspore/ccsrc/transform/convert.h b/mindspore/ccsrc/transform/graph_ir/convert.h
similarity index 94%
rename from mindspore/ccsrc/transform/convert.h
rename to mindspore/ccsrc/transform/graph_ir/convert.h
index 2f6c9bb0add..6fa27831bf2 100644
--- a/mindspore/ccsrc/transform/convert.h
+++ b/mindspore/ccsrc/transform/graph_ir/convert.h
@@ -31,11 +31,11 @@
 
 #include "ir/anf.h"
 #include "ir/func_graph.h"
-#include "transform/util.h"
+#include "transform/graph_ir/util.h"
 #include "ir/tensor.h"
-#include "transform/df_graph_manager.h"
+#include "transform/graph_ir/df_graph_manager.h"
 #include "utils/config_manager.h"
-#include "transform/op_declare.h"
+#include "transform/graph_ir/op_declare.h"
 #include "graph/operator_reg.h"
 #ifdef OPEN_SOURCE
 #include "ge/client/ge_api.h"
@@ -201,6 +201,7 @@ class DfGraphConvertor {
   OperatorPtr ConvertParameter(AnfNodePtr node);
   Status TryConvertValueNodeToMultiConst(const ValueNodePtr node);
   OperatorPtr ConvertValueNode(ValueNodePtr node);
+  void GetCaseNodeInput(const CNodePtr node, const CNodePtr input_node);
   void ConvertTupleGetItem(const CNodePtr node);
   void GetDependOnParameterUse(const CNodePtr &node, const AnfNodePtr &src_node, const AnfNodePtr &dest_node,
                                const std::shared_ptr<std::vector<OperatorPtr>> &src_ops_list,
@@ -217,6 +218,8 @@ class DfGraphConvertor {
   void SetNodeInput(AnfNodePtr node);
   void SetOpControlInput(const AnfNodePtr node);
   void UpdateOpDesc(AnfNodePtr node);
+  void SetSubgraph(AnfNodePtr node);
+  void ProcessSubgraph(AnfNodePtr node, const std::vector<AnfNodePtr> &inputs);
   void BuildSaveCheckpointGraph();
   void DrawCNode(const CNodePtr node, const OpAdapterPtr adpt);
   void UpdateDataOpDesc(const AnfNodePtr &it, const OperatorPtr &op) const;
@@ -228,22 +231,26 @@ class DfGraphConvertor {
   std::shared_ptr<DfGraph> save_ckp_graph_{nullptr};
   std::shared_ptr<DfGraph> restore_ckp_graph_{nullptr};
   std::shared_ptr<DfGraph> broadcast_graph_{nullptr};
+  std::unordered_map<AnfNode *, DfGraph> branches_map_;
   std::unordered_map<AnfNode *, OperatorPtr> op_cache_;
   std::unordered_map<AnfNode *, std::vector<ControlEdge>> control_depend_cache_;
   /* record "tuple_getitem"<->"out_handler" mapping */
   std::unordered_map<AnfNode *, OutHandler> out_handle_cache_;
   /* record "make_tuple"<->"out_handler vector" mapping */
   std::unordered_map<AnfNode *, std::shared_ptr<std::vector<OutHandler>>> tuple_out_handle_cache_;
+  std::unordered_map<AnfNode *, std::shared_ptr<std::vector<AnfNodePtr>>> case_input_handle_cache_;
   std::unordered_map<std::string, AnfNodePtr> params_;
   std::unordered_map<std::string, OperatorPtr> vars_;
   std::vector<std::pair<ge::Operator, std::string>> graph_outputs_;
   std::vector<OperatorPtr> graph_const_inputs_;
   std::vector<OperatorPtr> init_ops_;
   std::vector<OperatorPtr> broadcast_ops_;
+  std::vector<AnfNodePtr> inputs_;
   OperatorPtr dataset_iter_getnext_;
   Status error_ = SUCCESS;
   bool training_ = false;
   bool distribute_ = false;
+  bool use_inputs_ = false;
 };
 }  // namespace transform
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/transform/df_graph_manager.cc b/mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc
similarity index 97%
rename from mindspore/ccsrc/transform/df_graph_manager.cc
rename to mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc
index f62c3865877..29985d6784c 100644
--- a/mindspore/ccsrc/transform/df_graph_manager.cc
+++ b/mindspore/ccsrc/transform/graph_ir/df_graph_manager.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "transform/df_graph_manager.h"
+#include "transform/graph_ir/df_graph_manager.h"
 
 #include <dirent.h>
 #include <dlfcn.h>
@@ -22,8 +22,8 @@
 #include <sstream>
 
 #include "securec/include/securec.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/pipeline.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/pipeline.h"
 #include "utils/config_manager.h"
 #ifndef NO_DLIB
 #include "tdt/tsd_client.h"
diff --git a/mindspore/ccsrc/transform/df_graph_manager.h b/mindspore/ccsrc/transform/graph_ir/df_graph_manager.h
similarity index 98%
rename from mindspore/ccsrc/transform/df_graph_manager.h
rename to mindspore/ccsrc/transform/graph_ir/df_graph_manager.h
index 2ca43d1f073..8a574b7a040 100644
--- a/mindspore/ccsrc/transform/df_graph_manager.h
+++ b/mindspore/ccsrc/transform/graph_ir/df_graph_manager.h
@@ -23,7 +23,7 @@
 #include <mutex>
 #include <map>
 #include <utility>
-#include "transform/types.h"
+#include "transform/graph_ir/types.h"
 #include "ir/anf.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/transform/graph_builder.cc b/mindspore/ccsrc/transform/graph_ir/graph_builder.cc
similarity index 97%
rename from mindspore/ccsrc/transform/graph_builder.cc
rename to mindspore/ccsrc/transform/graph_ir/graph_builder.cc
index 785c5c7f3a0..6ee45feef8c 100644
--- a/mindspore/ccsrc/transform/graph_builder.cc
+++ b/mindspore/ccsrc/transform/graph_ir/graph_builder.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "transform/graph_builder.h"
+#include "transform/graph_ir/graph_builder.h"
 
 #include <sstream>
 #include <new>
diff --git a/mindspore/ccsrc/transform/graph_builder.h b/mindspore/ccsrc/transform/graph_ir/graph_builder.h
similarity index 92%
rename from mindspore/ccsrc/transform/graph_builder.h
rename to mindspore/ccsrc/transform/graph_ir/graph_builder.h
index 3d959f5a85c..5162674242c 100644
--- a/mindspore/ccsrc/transform/graph_builder.h
+++ b/mindspore/ccsrc/transform/graph_ir/graph_builder.h
@@ -22,8 +22,8 @@
 #include <vector>
 #include <map>
 #include <utility>
-#include "transform/types.h"
-#include "transform/convert.h"
+#include "transform/graph_ir/types.h"
+#include "transform/graph_ir/convert.h"
 
 namespace mindspore {
 namespace transform {
diff --git a/mindspore/ccsrc/transform/graph_runner.cc b/mindspore/ccsrc/transform/graph_ir/graph_runner.cc
similarity index 99%
rename from mindspore/ccsrc/transform/graph_runner.cc
rename to mindspore/ccsrc/transform/graph_ir/graph_runner.cc
index 52d0d8e17fe..d20c49a3818 100644
--- a/mindspore/ccsrc/transform/graph_runner.cc
+++ b/mindspore/ccsrc/transform/graph_ir/graph_runner.cc
@@ -14,7 +14,7 @@
  * Limitations under the License.
  */
 
-#include "transform/graph_runner.h"
+#include "transform/graph_ir/graph_runner.h"
 #include <algorithm>
 #include <string>
 #include <memory>
diff --git a/mindspore/ccsrc/transform/graph_runner.h b/mindspore/ccsrc/transform/graph_ir/graph_runner.h
similarity index 93%
rename from mindspore/ccsrc/transform/graph_runner.h
rename to mindspore/ccsrc/transform/graph_ir/graph_runner.h
index 30769c8310b..92db9e14135 100644
--- a/mindspore/ccsrc/transform/graph_runner.h
+++ b/mindspore/ccsrc/transform/graph_ir/graph_runner.h
@@ -23,10 +23,10 @@
 #include <map>
 #include <memory>
 
-#include "transform/types.h"
-#include "transform/util.h"
+#include "transform/graph_ir/types.h"
+#include "transform/graph_ir/util.h"
 #include "ir/tensor.h"
-#include "transform/df_graph_manager.h"
+#include "transform/graph_ir/df_graph_manager.h"
 
 namespace mindspore {
 namespace transform {
diff --git a/mindspore/ccsrc/transform/op_adapter.h b/mindspore/ccsrc/transform/graph_ir/op_adapter.h
similarity index 96%
rename from mindspore/ccsrc/transform/op_adapter.h
rename to mindspore/ccsrc/transform/graph_ir/op_adapter.h
index ae678606a4b..358cbd20a1d 100644
--- a/mindspore/ccsrc/transform/op_adapter.h
+++ b/mindspore/ccsrc/transform/graph_ir/op_adapter.h
@@ -22,7 +22,7 @@
 #include <string>
 #include <unordered_map>
 
-#include "transform/op_adapter_util.h"
+#include "transform/graph_ir/op_adapter_util.h"
 #include "utils/utils.h"
 namespace mindspore {
 namespace transform {
@@ -164,6 +164,25 @@ class OpAdapter : public BaseOpAdapter {
   const std::unordered_map<unsigned int, AttrDesc> &getInputAttrMap() override { return input_attr_map_; }
   const std::unordered_map<int, DynInputDesc> &getDynInputMap() override { return dyn_input_map_; }
   const std::unordered_map<int, OutputDesc> &getOutputMap() override { return output_map_; }
+  const std::unordered_map<int, DynSubGraphDesc> &getDynSubgraphMap() override { return dyn_subgraph_map_; }
+
+  Status SetOpSubgraphFunc(const OperatorPtr &op, int index, std::shared_ptr<std::vector<DfGraph>> branches) {
+    MS_EXCEPTION_IF_NULL(op);
+    auto it = dyn_subgraph_map_.find(index);
+    if (it != dyn_subgraph_map_.end()) {
+      auto size = branches->size();
+      it->second.create_dyn_subgraph(op, static_cast<unsigned int>(size));
+      for (size_t i = 0; i < size; i++) {
+        it->second.set_subgraph(op, static_cast<unsigned int>(i), std::make_shared<DfGraph>((*branches)[i]));
+      }
+      return SUCCESS;
+    }
+    return NOT_FOUND;
+  }
+
+  int setSubgraph(const OperatorPtr &op, int index, std::shared_ptr<std::vector<DfGraph>> branches) override {
+    return static_cast<int>(SetOpSubgraphFunc(op, index, branches));
+  }
 
   Status SetCustomOpInput(const CusOperatorPtr &op, int index, const OperatorPtr &input) {
     MS_EXCEPTION_IF_NULL(op);
@@ -855,6 +874,7 @@ class OpAdapter : public BaseOpAdapter {
   static const std::unordered_map<int, DynInputDesc> dyn_input_map_;
   static const std::unordered_map<int, OutputDesc> output_map_;
   static const std::unordered_map<int, DynOutputDesc> dyn_output_map_;
+  static const std::unordered_map<int, DynSubGraphDesc> dyn_subgraph_map_;
   static const std::unordered_map<std::string, AttrDesc> attr_map_;
   static const std::unordered_map<std::string, int> enum_map_;
   // convert input from anf graph to Attr in Operators
@@ -874,6 +894,8 @@ const std::unordered_map<int, OutputDesc> OpAdapter<T>::output_map_;
 template <typename T>
 const std::unordered_map<int, DynOutputDesc> OpAdapter<T>::dyn_output_map_;
 template <typename T>
+const std::unordered_map<int, DynSubGraphDesc> OpAdapter<T>::dyn_subgraph_map_;
+template <typename T>
 const std::unordered_map<std::string, AttrDesc> OpAdapter<T>::attr_map_;
 template <typename T>
 const std::unordered_map<std::string, int> OpAdapter<T>::enum_map_;
diff --git a/mindspore/ccsrc/transform/op_adapter_base.h b/mindspore/ccsrc/transform/graph_ir/op_adapter_base.h
similarity index 90%
rename from mindspore/ccsrc/transform/op_adapter_base.h
rename to mindspore/ccsrc/transform/graph_ir/op_adapter_base.h
index 01f96e251db..77e28dda94c 100644
--- a/mindspore/ccsrc/transform/op_adapter_base.h
+++ b/mindspore/ccsrc/transform/graph_ir/op_adapter_base.h
@@ -24,12 +24,11 @@
 #include <vector>
 #include <sstream>
 
-#include "transform/util.h"
+#include "transform/graph_ir/util.h"
 #include "ir/anf.h"
 #include "ir/primitive.h"
 #include "ir/value.h"
-#include "transform/types.h"
-
+#include "transform/graph_ir/types.h"
 #ifdef ENABLE_GE
 #ifdef OPEN_SOURCE
 #include "graph/types.h"
@@ -43,7 +42,7 @@
 #include "external/ge/ge_api.h"
 #endif
 #include "graph/tensor.h"
-#include "transform/all_ops.h"
+#include "transform/graph_ir/all_ops.h"
 
 namespace ge {
 class CustomOperator : public Operator {
@@ -88,6 +87,8 @@ using DynInputOpFunc = std::function<void(OperatorPtr, unsigned int, OperatorPtr
 using DynInputHandleFunc = std::function<void(OperatorPtr, unsigned int, OutHandler)>;
 using UpdateOutputDescFunc = std::function<void(OperatorPtr, GeTensorDesc)>;
 using CreateDynOutputOpFunc = std::function<void(OperatorPtr, unsigned int)>;
+using CreateDynSubGraphFunc = std::function<void(OperatorPtr, unsigned int)>;
+using DynSubGraphFunc = std::function<void(OperatorPtr, unsigned int, DfGraphPtr)>;
 
 struct AttrDesc {
   std::string name;
@@ -108,6 +109,12 @@ struct DynInputDesc {
   DynInputHandleFunc set_handle;
 };
 
+struct DynSubGraphDesc {
+  std::string name;
+  CreateDynSubGraphFunc create_dyn_subgraph;
+  DynSubGraphFunc set_subgraph;
+};
+
 struct OutputDesc {
   std::string name;
   UpdateOutputDescFunc update_out_desc;
@@ -123,6 +130,7 @@ class BaseOpAdapter {
   virtual ~BaseOpAdapter() {}
   virtual OperatorPtr generate(const AnfNodePtr &anf) = 0;
   virtual OperatorPtr generate(const std::string &type) { return std::make_shared<ge::Operator>(type); }
+  virtual int setSubgraph(const OperatorPtr &op, int index, std::shared_ptr<std::vector<DfGraph>> branches) = 0;
   virtual int setInput(const OperatorPtr &op, int index, const OperatorPtr &input) = 0;
   virtual int setInput(const OperatorPtr &op, int index, const OutHandler &handle) = 0;
   virtual int setInput(const OperatorPtr &op, int index,
@@ -146,6 +154,7 @@ class BaseOpAdapter {
   virtual const std::unordered_map<unsigned int, AttrDesc> &getInputAttrMap() = 0;
   virtual const std::unordered_map<int, DynInputDesc> &getDynInputMap() = 0;
   virtual const std::unordered_map<int, OutputDesc> &getOutputMap() = 0;
+  virtual const std::unordered_map<int, DynSubGraphDesc> &getDynSubgraphMap() = 0;
   void AddAttrToDrawGraph(const std::string &attr_str) { attrs_vec_.push_back(attr_str); }
   const std::vector<std::string> &GetAttrsFromDrawGraph() const { return attrs_vec_; }
   void clearAttrVect() { attrs_vec_.clear(); }
diff --git a/mindspore/ccsrc/transform/op_adapter_util.cc b/mindspore/ccsrc/transform/graph_ir/op_adapter_util.cc
similarity index 99%
rename from mindspore/ccsrc/transform/op_adapter_util.cc
rename to mindspore/ccsrc/transform/graph_ir/op_adapter_util.cc
index cae43c13dca..78f1f263de7 100644
--- a/mindspore/ccsrc/transform/op_adapter_util.cc
+++ b/mindspore/ccsrc/transform/graph_ir/op_adapter_util.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "transform/op_adapter_util.h"
+#include "transform/graph_ir/op_adapter_util.h"
 
 #include <string>
 #include <vector>
 #include <algorithm>
 
 #include "utils/utils.h"
-#include "transform/op_adapter_base.h"
+#include "transform/graph_ir/op_adapter_base.h"
 
 namespace mindspore {
 namespace transform {
diff --git a/mindspore/ccsrc/transform/op_adapter_util.h b/mindspore/ccsrc/transform/graph_ir/op_adapter_util.h
similarity index 98%
rename from mindspore/ccsrc/transform/op_adapter_util.h
rename to mindspore/ccsrc/transform/graph_ir/op_adapter_util.h
index fcabc732d58..0a0d745ba22 100644
--- a/mindspore/ccsrc/transform/op_adapter_util.h
+++ b/mindspore/ccsrc/transform/graph_ir/op_adapter_util.h
@@ -20,7 +20,7 @@
 #include <string>
 #include <vector>
 
-#include "transform/op_adapter_base.h"
+#include "transform/graph_ir/op_adapter_base.h"
 
 namespace mindspore {
 namespace transform {
diff --git a/mindspore/ccsrc/transform/graph_ir/op_declare.cc b/mindspore/ccsrc/transform/graph_ir/op_declare.cc
new file mode 100644
index 00000000000..e3751e0c925
--- /dev/null
+++ b/mindspore/ccsrc/transform/graph_ir/op_declare.cc
@@ -0,0 +1,1330 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "transform/graph_ir/op_declare.h"
+
+#include <vector>
+
+#include "transform/graph_ir/all_ops.h"
+#include "utils/utils.h"
+
+namespace mindspore {
+namespace transform {
+#define INPUT_MAP(T) \
+  template <>        \
+  const std::unordered_map<int, InputDesc> OpAdapter<T>::input_map_
+#define EMPTY_INPUT_MAP std::unordered_map<int, InputDesc>()
+#define INPUT_DESC(name) \
+  {                      \
+#name, \
+    [](const OperatorPtr op, const OperatorPtr input) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->set_input_##name(*input); \
+    }, \
+    [](const OperatorPtr op, const OutHandler& handle) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->set_input_##name(*(handle.op), handle.out); \
+    }, \
+    [](const OperatorPtr op, const GeTensorDesc desc) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->update_input_desc_##name(desc); \
+    }                 \
+  }
+
+#define DYN_INPUT_MAP(T) \
+  template <>            \
+  const std::unordered_map<int, DynInputDesc> OpAdapter<T>::dyn_input_map_
+#define DYN_INPUT_DESC(name) \
+  {                          \
+#name, \
+    [](const OperatorPtr op, unsigned int num) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->create_dynamic_input_##name(num); \
+    }, \
+    [](const OperatorPtr op, unsigned int index, const OperatorPtr input) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->set_dynamic_input_##name(index, *input); \
+    }, \
+    [](const OperatorPtr op, unsigned int index, const OutHandler& handle) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->set_dynamic_input_##name(index, *(handle.op), handle.out); \
+    }                     \
+  }
+
+#define DYN_SUBGRAPH_MAP(T) \
+  template <>               \
+  const std::unordered_map<int, DynSubGraphDesc> OpAdapter<T>::dyn_subgraph_map_
+#define DYN_SUBGRAPH_DESC(name) \
+  {                             \
+#name, \
+    [](const OperatorPtr op, unsigned int num) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->create_dynamic_subgraph_##name(num); \
+    }, \
+    [](const OperatorPtr op, unsigned int index, const DfGraphPtr graph) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->set_dynamic_subgraph_builder_##name(index, [graph](){return *graph;}); \
+    }                        \
+  }
+
+#define ATTR_MAP(T) \
+  template <>       \
+  const std::unordered_map<std::string, AttrDesc> OpAdapter<T>::attr_map_
+#define EMPTY_ATTR_MAP std::unordered_map<std::string, AttrDesc>()
+#define ATTR_DESC(name, ...) \
+  {                          \
+#name, \
+    [](const OperatorPtr op, const ValuePtr& value) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->set_attr_##name(ConvertAny(value, __VA_ARGS__)); \
+    }                     \
+  }
+
+#define INPUT_ATTR_MAP(T) \
+  template <>             \
+  const std::unordered_map<unsigned int, AttrDesc> OpAdapter<T>::input_attr_map_
+
+#define OUTPUT_MAP(T) \
+  template <>         \
+  const std::unordered_map<int, OutputDesc> OpAdapter<T>::output_map_
+#define OUTPUT_DESC(name) \
+  {                       \
+#name, \
+    [](const OperatorPtr op, const GeTensorDesc desc) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->update_output_desc_##name(desc); \
+    }                  \
+  }
+
+#define DYN_OUTPUT_MAP(T) \
+  template <>             \
+  const std::unordered_map<int, DynOutputDesc> OpAdapter<T>::dyn_output_map_
+
+#define DYN_OUTPUT_DESC(name) \
+  {                           \
+#name, \
+    [](const OperatorPtr op, unsigned int num) { \
+        auto p = std::static_pointer_cast<OpType>(op); \
+        (void)p->create_dynamic_output_##name(num); \
+    }                      \
+  }
+
+template <>
+std::unordered_map<std::string, std::unordered_map<int, std::string>> OpAdapter<ge::Operator>::cus_input_map_{};
+template <>
+std::unordered_map<std::string, std::unordered_map<int, std::string>> OpAdapter<ge::Operator>::cus_output_map_{};
+
+// --------------specialization for each operator----------
+// const
+INPUT_MAP(Const) = EMPTY_INPUT_MAP;
+ATTR_MAP(Const) = {{"value", ATTR_DESC(value, AnyTraits<AnyValue>())}};
+OUTPUT_MAP(Const) = {{0, OUTPUT_DESC(y)}};
+
+// Assign
+INPUT_MAP(Assign) = {{1, INPUT_DESC(ref)}, {2, INPUT_DESC(value)}};
+ATTR_MAP(Assign) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Assign) = {{0, OUTPUT_DESC(ref)}};
+
+// Constant
+INPUT_MAP(Constant) = EMPTY_INPUT_MAP;
+ATTR_MAP(Constant) = {{"value", ATTR_DESC(value, AnyTraits<AnyValue>())}};
+OUTPUT_MAP(Constant) = {{0, OUTPUT_DESC(y)}};
+
+// ApplyMomentumD
+INPUT_MAP(ApplyMomentumD) = {
+  {1, INPUT_DESC(var)}, {2, INPUT_DESC(accum)}, {3, INPUT_DESC(lr)}, {4, INPUT_DESC(grad)}, {5, INPUT_DESC(momentum)}};
+ATTR_MAP(ApplyMomentumD) = {{"use_nesterov", ATTR_DESC(use_nesterov, AnyTraits<bool>())},
+                            {"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ApplyMomentumD) = {{0, OUTPUT_DESC(var)}, {1, OUTPUT_DESC(accum)}};
+
+// ScalarSummary
+INPUT_MAP(Summary) = {{2, INPUT_DESC(x)}};
+ATTR_MAP(Summary) = EMPTY_ATTR_MAP;
+
+// Data
+INPUT_MAP(Data) = EMPTY_INPUT_MAP;
+ATTR_MAP(Data) = EMPTY_ATTR_MAP;
+
+// BatchNorm
+INPUT_MAP(BatchNorm) = {{1, INPUT_DESC(x)},
+                        {2, INPUT_DESC(scale)},
+                        {3, INPUT_DESC(offset)},
+                        {4, INPUT_DESC(mean)},
+                        {5, INPUT_DESC(variance)}};
+ATTR_MAP(BatchNorm) = {{"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())},
+                       {"epsilon", ATTR_DESC(epsilon, AnyTraits<float>())},
+                       {"is_training", ATTR_DESC(is_training, AnyTraits<bool>())}};
+OUTPUT_MAP(BatchNorm) = {{0, OUTPUT_DESC(y)},
+                         {1, OUTPUT_DESC(batch_mean)},
+                         {2, OUTPUT_DESC(batch_variance)},
+                         {3, OUTPUT_DESC(reserve_space_1)},
+                         {4, OUTPUT_DESC(reserve_space_2)}};
+
+// BatchNormGrad
+INPUT_MAP(BatchNormGrad) = {{1, INPUT_DESC(y_backprop)},
+                            {2, INPUT_DESC(x)},
+                            {3, INPUT_DESC(scale)},
+                            {4, INPUT_DESC(reserve_space_1)},
+                            {5, INPUT_DESC(reserve_space_2)}};
+ATTR_MAP(BatchNormGrad) = {{"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())},
+                           {"epsilon", ATTR_DESC(epsilon, AnyTraits<float>())},
+                           {"is_training", ATTR_DESC(is_training, AnyTraits<bool>())}};
+OUTPUT_MAP(BatchNormGrad) = {{0, OUTPUT_DESC(x_backprop)},
+                             {1, OUTPUT_DESC(scale_backprop)},
+                             {2, OUTPUT_DESC(offset_backprop)},
+                             {3, OUTPUT_DESC(reserve_space_4)},
+                             {4, OUTPUT_DESC(reserve_space_5)}};
+
+// Relu
+INPUT_MAP(Relu) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Relu) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Relu) = {{0, OUTPUT_DESC(y)}};
+
+// Elu
+INPUT_MAP(Elu) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Elu) = {{"alpha", ATTR_DESC(alpha, AnyTraits<float>())}};
+OUTPUT_MAP(Elu) = {{0, OUTPUT_DESC(y)}};
+
+// EluGrad
+INPUT_MAP(EluGrad) = {{1, INPUT_DESC(grads)}, {2, INPUT_DESC(activations)}};
+ATTR_MAP(EluGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(EluGrad) = {{0, OUTPUT_DESC(y)}};
+
+// PRelu
+INPUT_MAP(PRelu) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(weight)}};
+ATTR_MAP(PRelu) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(PRelu) = {{0, OUTPUT_DESC(y)}};
+
+// PReluGrad
+INPUT_MAP(PReluGrad) = {{1, INPUT_DESC(grads)}, {2, INPUT_DESC(features)}, {3, INPUT_DESC(weights)}};
+ATTR_MAP(PReluGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(PReluGrad) = {{0, OUTPUT_DESC(dx)}, {1, OUTPUT_DESC(da)}};
+
+// Sigmoid
+INPUT_MAP(Sigmoid) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Sigmoid) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Sigmoid) = {{0, OUTPUT_DESC(y)}};
+
+// SigmoidGrad
+INPUT_MAP(SigmoidGrad) = {{1, INPUT_DESC(y)}, {2, INPUT_DESC(dy)}};
+ATTR_MAP(SigmoidGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(SigmoidGrad) = {{0, OUTPUT_DESC(z)}};
+
+// L2NormalizeGrad
+INPUT_MAP(L2NormalizeGrad) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(y)}, {3, INPUT_DESC(dy)}};
+ATTR_MAP(L2NormalizeGrad) = {
+  {"axis", ATTR_DESC(dim, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"epsilon", ATTR_DESC(eps, AnyTraits<float>())}};
+OUTPUT_MAP(L2NormalizeGrad) = {{0, OUTPUT_DESC(dx)}};
+
+// LarsV2Update
+INPUT_MAP(LarsV2Update) = {{1, INPUT_DESC(w)},
+                           {2, INPUT_DESC(g)},
+                           {3, INPUT_DESC(w_square_sum)},
+                           {4, INPUT_DESC(g_square_sum)},
+                           {5, INPUT_DESC(weight_decay)},
+                           {6, INPUT_DESC(learning_rate)}};
+ATTR_MAP(LarsV2Update) = {{"epsilon", ATTR_DESC(epsilon, AnyTraits<float>())},
+                          {"hyperpara", ATTR_DESC(hyperpara, AnyTraits<float>())},
+                          {"use_clip", ATTR_DESC(use_clip, AnyTraits<bool>())}};
+OUTPUT_MAP(LarsV2Update) = {{0, OUTPUT_DESC(g_new)}};
+
+// L2Normalize
+INPUT_MAP(L2Normalize) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(L2Normalize) = {
+  {"axis", ATTR_DESC(axis, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"epsilon", ATTR_DESC(eps, AnyTraits<float>())}};
+OUTPUT_MAP(L2Normalize) = {{0, OUTPUT_DESC(y)}};
+
+// CumsumD
+INPUT_MAP(CumsumD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(CumsumD) = {{2, ATTR_DESC(axis, AnyTraits<int64_t>())}};
+ATTR_MAP(CumsumD) = {{"exclusive", ATTR_DESC(exclusive, AnyTraits<bool>())},
+                     {"reverse", ATTR_DESC(reverse, AnyTraits<bool>())}};
+OUTPUT_MAP(CumsumD) = {{0, OUTPUT_DESC(y)}};
+
+// SoftmaxV2
+INPUT_MAP(SoftmaxV2) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(SoftmaxV2) = {
+  {"axis", ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+};
+OUTPUT_MAP(SoftmaxV2) = {{0, OUTPUT_DESC(y)}};
+
+// SoftmaxGrad
+INPUT_MAP(SoftmaxGrad) = {{1, INPUT_DESC(softmax)}, {2, INPUT_DESC(grad_softmax)}};
+OUTPUT_MAP(SoftmaxGrad) = {{0, OUTPUT_DESC(grad_x)}};
+ATTR_MAP(SoftmaxGrad) = EMPTY_ATTR_MAP;
+
+// Flatten
+INPUT_MAP(Flatten) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Flatten) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Flatten) = {{0, OUTPUT_DESC(y)}};
+
+// add
+INPUT_MAP(Add) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Add) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Add) = {{0, OUTPUT_DESC(y)}};
+
+// GatherV2
+INPUT_MAP(GatherV2) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(indices)}, {3, INPUT_DESC(axis)}};
+ATTR_MAP(GatherV2) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(GatherV2) = {{0, OUTPUT_DESC(y)}};
+
+// ReduceSumD
+INPUT_MAP(ReduceSumD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(ReduceSumD) = {
+  {2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(ReduceSumD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ReduceSumD) = {{0, OUTPUT_DESC(y)}};
+
+// ReduceProdD
+INPUT_MAP(ReduceProdD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(ReduceProdD) = {
+  {2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(ReduceProdD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ReduceProdD) = {{0, OUTPUT_DESC(y)}};
+
+// CumprodD
+INPUT_MAP(CumprodD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(CumprodD) = {{2, ATTR_DESC(axis, AnyTraits<int64_t>())}};
+ATTR_MAP(CumprodD) = {{"exclusive", ATTR_DESC(exclusive, AnyTraits<bool>())},
+                      {"reverse", ATTR_DESC(reverse, AnyTraits<bool>())}};
+OUTPUT_MAP(CumprodD) = {{0, OUTPUT_DESC(y)}};
+
+// SoftmaxCrossEntropyWithLogits
+INPUT_MAP(SoftmaxCrossEntropyWithLogits) = {{1, INPUT_DESC(features)}, {2, INPUT_DESC(labels)}};
+ATTR_MAP(SoftmaxCrossEntropyWithLogits) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(SoftmaxCrossEntropyWithLogits) = {{0, OUTPUT_DESC(loss)}, {1, OUTPUT_DESC(backprop)}};
+
+// MeanGrad
+INPUT_MAP(MeanGrad) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(MeanGrad) = {{2, ATTR_DESC(mean_grad_output_shape_value, kOpFormat_NHWC,
+                                          AnyTraits<std::vector<int64_t>>(), AnyTraits<int64_t>())}};
+ATTR_MAP(MeanGrad) = {{"mode", ATTR_DESC(mode, AnyTraits<int64_t>())}};
+
+INPUT_MAP(SliceD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(SliceD) = {{2, ATTR_DESC(offsets, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                          {3, ATTR_DESC(size, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(SliceD) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(SliceD) = {{0, OUTPUT_DESC(y)}};
+
+// MaxPool
+INPUT_MAP(MaxPool) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(MaxPool) = {{"ksize", ATTR_DESC(ksize, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                     {"strides", ATTR_DESC(strides, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                     {"padding", ATTR_DESC(padding, AnyTraits<std::string>())},
+                     {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())}};
+OUTPUT_MAP(MaxPool) = {{0, OUTPUT_DESC(y)}};
+
+// AvgPool
+INPUT_MAP(AvgPool) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(AvgPool) = {{"ksize", ATTR_DESC(ksize, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                     {"strides", ATTR_DESC(strides, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                     {"padding", ATTR_DESC(padding, AnyTraits<std::string>())},
+                     {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())}};
+OUTPUT_MAP(AvgPool) = {{0, OUTPUT_DESC(y)}};
+
+// GreaterEqual
+INPUT_MAP(GreaterEqual) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(GreaterEqual) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(GreaterEqual) = {{0, OUTPUT_DESC(y)}};
+
+// AssignAdd
+INPUT_MAP(AssignAdd) = {{1, INPUT_DESC(ref)}, {2, INPUT_DESC(value)}};
+ATTR_MAP(AssignAdd) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(AssignAdd) = {{0, OUTPUT_DESC(ref)}};
+
+// AssignSub
+INPUT_MAP(AssignSub) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(value)}};
+ATTR_MAP(AssignSub) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(AssignSub) = {{0, OUTPUT_DESC(var)}};
+
+// Cos
+INPUT_MAP(Cos) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Cos) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Cos) = {{0, OUTPUT_DESC(y)}};
+
+// Acos
+INPUT_MAP(Acos) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Acos) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Acos) = {{0, OUTPUT_DESC(y)}};
+
+// AcosGrad
+INPUT_MAP(AcosGrad) = {{1, INPUT_DESC(y)}, {2, INPUT_DESC(dy)}};
+ATTR_MAP(AcosGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(AcosGrad) = {{0, OUTPUT_DESC(z)}};
+
+// Acosh
+INPUT_MAP(Acosh) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Acosh) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Acosh) = {{0, OUTPUT_DESC(y)}};
+
+// AcoshGrad
+INPUT_MAP(AcoshGrad) = {{1, INPUT_DESC(y)}, {2, INPUT_DESC(dy)}};
+ATTR_MAP(AcoshGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(AcoshGrad) = {{0, OUTPUT_DESC(z)}};
+
+// Floor
+INPUT_MAP(Floor) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Floor) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Floor) = {{0, OUTPUT_DESC(y)}};
+
+// FloorDiv
+INPUT_MAP(FloorDiv) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(FloorDiv) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(FloorDiv) = {{0, OUTPUT_DESC(y)}};
+
+// FloorMod
+INPUT_MAP(FloorMod) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(FloorMod) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(FloorMod) = {{0, OUTPUT_DESC(y)}};
+
+// Sin
+INPUT_MAP(Sin) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Sin) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Sin) = {{0, OUTPUT_DESC(y)}};
+
+// Exp
+INPUT_MAP(Exp) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Exp) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Exp) = {{0, OUTPUT_DESC(y)}};
+
+// BoundingBoxEncode
+INPUT_MAP(BoundingBoxEncode) = {
+  {1, INPUT_DESC(anchor_box)},
+  {2, INPUT_DESC(ground_truth_box)},
+};
+ATTR_MAP(BoundingBoxEncode) = {
+  {"means", ATTR_DESC(means, AnyTraits<std::vector<float>>(), AnyTraits<float>())},
+  {"stds", ATTR_DESC(stds, AnyTraits<std::vector<float>>(), AnyTraits<float>())},
+};
+OUTPUT_MAP(BoundingBoxEncode) = {{0, OUTPUT_DESC(delats)}};
+
+// BoundingBoxDecode
+INPUT_MAP(BoundingBoxDecode) = {
+  {1, INPUT_DESC(rois)},
+  {2, INPUT_DESC(deltas)},
+};
+ATTR_MAP(BoundingBoxDecode) = {
+  {"means", ATTR_DESC(means, AnyTraits<std::vector<float>>(), AnyTraits<float>())},
+  {"stds", ATTR_DESC(stds, AnyTraits<std::vector<float>>(), AnyTraits<float>())},
+  {"max_shape", ATTR_DESC(max_shape, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"wh_ratio_clip", ATTR_DESC(wh_ratio_clip, AnyTraits<float>())},
+};
+OUTPUT_MAP(BoundingBoxDecode) = {{0, OUTPUT_DESC(bboxes)}};
+
+// TopK
+INPUT_MAP(TopK) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(k)}};
+ATTR_MAP(TopK) = {{"sorted", ATTR_DESC(sorted, AnyTraits<bool>())}};
+OUTPUT_MAP(TopK) = {{0, OUTPUT_DESC(values)}, {1, OUTPUT_DESC(indices)}};
+
+// Multiply
+INPUT_MAP(Multiply) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(y)}};
+ATTR_MAP(Multiply) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Multiply) = {{0, OUTPUT_DESC(z)}};
+
+// TileD
+INPUT_MAP(TileD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(TileD) = {{2, ATTR_DESC(multiples, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(TileD) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(TileD) = {{0, OUTPUT_DESC(y)}};
+
+// OneHot
+INPUT_MAP(OneHot) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(depth)}, {3, INPUT_DESC(on_value)}, {4, INPUT_DESC(off_value)}};
+ATTR_MAP(OneHot) = {{"axis", ATTR_DESC(axis, AnyTraits<int64_t>())}};
+OUTPUT_MAP(OneHot) = {{0, OUTPUT_DESC(y)}};
+
+// GatherV2D
+INPUT_MAP(GatherV2D) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(indices)}};
+INPUT_ATTR_MAP(GatherV2D) = {{3, ATTR_DESC(axis, AnyTraits<int64_t>())}};
+ATTR_MAP(GatherV2D) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(GatherV2D) = {{0, OUTPUT_DESC(y)}};
+
+// Reshape
+INPUT_MAP(Reshape) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(shape)}};
+ATTR_MAP(Reshape) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Reshape) = {{0, OUTPUT_DESC(y)}};
+
+// TransShape
+INPUT_MAP(TransShape) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(TransShape) = {{2, ATTR_DESC(outShape, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(TransShape) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(TransShape) = {{0, OUTPUT_DESC(y)}};
+
+// BiasAdd
+INPUT_MAP(BiasAdd) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(bias)}};
+ATTR_MAP(BiasAdd) = {{"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())}};
+OUTPUT_MAP(BiasAdd) = {{0, OUTPUT_DESC(y)}};
+
+// Iou
+INPUT_MAP(Iou) = {{1, INPUT_DESC(bboxes)}, {2, INPUT_DESC(gtboxes)}};
+ATTR_MAP(Iou) = {{"mode", ATTR_DESC(mode, AnyTraits<std::string>())}};
+OUTPUT_MAP(Iou) = {{0, OUTPUT_DESC(overlap)}};
+
+// ResizeNearestNeighborV2D
+INPUT_MAP(ResizeNearestNeighborV2D) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ResizeNearestNeighborV2D) = {
+  {"size", ATTR_DESC(size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"align_corners", ATTR_DESC(align_corners, AnyTraits<bool>())}};
+OUTPUT_MAP(ResizeNearestNeighborV2D) = {{0, OUTPUT_DESC(y)}};
+
+// ResizeNearestNeighborV2Grad
+INPUT_MAP(ResizeNearestNeighborV2Grad) = {{1, INPUT_DESC(grads)}, {2, INPUT_DESC(size)}};
+ATTR_MAP(ResizeNearestNeighborV2Grad) = {{"align_corners", ATTR_DESC(align_corners, AnyTraits<bool>())}};
+OUTPUT_MAP(ResizeNearestNeighborV2Grad) = {{0, OUTPUT_DESC(y)}};
+
+// ApplyAdam
+INPUT_MAP(ApplyAdam) = {{1, INPUT_DESC(var)},         {2, INPUT_DESC(m)},           {3, INPUT_DESC(v)},
+                        {4, INPUT_DESC(beta1_power)}, {5, INPUT_DESC(beta2_power)}, {6, INPUT_DESC(lr)},
+                        {7, INPUT_DESC(beta1)},       {8, INPUT_DESC(beta2)},       {9, INPUT_DESC(epsilon)},
+                        {10, INPUT_DESC(grad)}};
+ATTR_MAP(ApplyAdam) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())},
+                       {"use_nesterov", ATTR_DESC(use_nesterov, AnyTraits<bool>())}};
+OUTPUT_MAP(ApplyAdam) = {{0, OUTPUT_DESC(var)}};
+
+// ApplyAdamD
+INPUT_MAP(ApplyAdamD) = {{1, INPUT_DESC(var)},         {2, INPUT_DESC(m)},           {3, INPUT_DESC(v)},
+                         {4, INPUT_DESC(beta1_power)}, {5, INPUT_DESC(beta2_power)}, {6, INPUT_DESC(lr)},
+                         {7, INPUT_DESC(beta1)},       {8, INPUT_DESC(beta2)},       {9, INPUT_DESC(epsilon)},
+                         {10, INPUT_DESC(grad)}};
+ATTR_MAP(ApplyAdamD) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())},
+                        {"use_nesterov", ATTR_DESC(use_nesterov, AnyTraits<bool>())}};
+OUTPUT_MAP(ApplyAdamD) = {{0, OUTPUT_DESC(var)}, {1, OUTPUT_DESC(m)}, {2, OUTPUT_DESC(v)}};
+
+// Relu6
+INPUT_MAP(Relu6) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Relu6) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Relu6) = {{0, OUTPUT_DESC(y)}};
+
+// Relu6Grad
+INPUT_MAP(Relu6Grad) = {{1, INPUT_DESC(gradients)}, {2, INPUT_DESC(features)}};
+ATTR_MAP(Relu6Grad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Relu6Grad) = {{0, OUTPUT_DESC(backprops)}};
+
+// ResizeBilinearV2Grad
+INPUT_MAP(ResizeBilinearV2Grad) = {{1, INPUT_DESC(grads)}, {2, INPUT_DESC(original_image)}};
+ATTR_MAP(ResizeBilinearV2Grad) = {{"align_corners", ATTR_DESC(align_corners, AnyTraits<bool>())}};
+OUTPUT_MAP(ResizeBilinearV2Grad) = {{0, OUTPUT_DESC(y)}};
+
+// ResizeBilinearV2D
+INPUT_MAP(ResizeBilinearV2D) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ResizeBilinearV2D) = {
+  {"size", ATTR_DESC(size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"align_corners", ATTR_DESC(align_corners, AnyTraits<bool>())}};
+OUTPUT_MAP(ResizeBilinearV2D) = {{0, OUTPUT_DESC(y)}};
+
+// ZerosLike
+INPUT_MAP(ZerosLike) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ZerosLike) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(ZerosLike) = {{0, OUTPUT_DESC(y)}};
+
+// OnesLike
+INPUT_MAP(OnesLike) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(OnesLike) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(OnesLike) = {{0, OUTPUT_DESC(y)}};
+
+// NMSWithMask
+INPUT_MAP(NMSWithMask) = {{1, INPUT_DESC(box_scores)}};
+ATTR_MAP(NMSWithMask) = {{"iou_threshold", ATTR_DESC(iou_threshold, AnyTraits<float>())}};
+OUTPUT_MAP(NMSWithMask) = {
+  {0, OUTPUT_DESC(selected_boxes)}, {1, OUTPUT_DESC(selected_idx)}, {2, OUTPUT_DESC(selected_mask)}};
+
+// Unpack
+INPUT_MAP(Unpack) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Unpack) = {{"axis", ATTR_DESC(axis, AnyTraits<int>())}, {"num", ATTR_DESC(num, AnyTraits<int>())}};
+DYN_OUTPUT_MAP(Unpack) = {{0, DYN_OUTPUT_DESC(y)}};
+
+// TensorScatterUpdate
+INPUT_MAP(TensorScatterUpdate) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(indices)}, {3, INPUT_DESC(updates)}};
+ATTR_MAP(TensorScatterUpdate) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(TensorScatterUpdate) = {{0, OUTPUT_DESC(y)}};
+
+// ScatterUpdate
+INPUT_MAP(ScatterUpdate) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(indices)}, {3, INPUT_DESC(updates)}};
+ATTR_MAP(ScatterUpdate) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ScatterUpdate) = {{0, OUTPUT_DESC(var)}};
+
+// ScatterNdUpdate
+INPUT_MAP(ScatterNdUpdate) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(indices)}, {3, INPUT_DESC(updates)}};
+ATTR_MAP(ScatterNdUpdate) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ScatterNdUpdate) = {{0, OUTPUT_DESC(var)}};
+
+// ScatterMax
+INPUT_MAP(ScatterMax) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(indices)}, {3, INPUT_DESC(updates)}};
+ATTR_MAP(ScatterMax) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ScatterMax) = {{0, OUTPUT_DESC(var)}};
+
+// CheckValid
+INPUT_MAP(CheckValid) = {{1, INPUT_DESC(bbox_tensor)}, {2, INPUT_DESC(img_metas)}};
+ATTR_MAP(CheckValid) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(CheckValid) = {{0, OUTPUT_DESC(valid_tensor)}};
+
+// SmoothL1Loss
+INPUT_MAP(SmoothL1Loss) = {{1, INPUT_DESC(predict)}, {2, INPUT_DESC(label)}};
+ATTR_MAP(SmoothL1Loss) = {{"sigma", ATTR_DESC(sigma, AnyTraits<float>())}};
+OUTPUT_MAP(SmoothL1Loss) = {{0, OUTPUT_DESC(loss)}};
+
+// SmoothL1LossGrad
+INPUT_MAP(SmoothL1LossGrad) = {{1, INPUT_DESC(predict)}, {2, INPUT_DESC(label)}, {3, INPUT_DESC(dout)}};
+ATTR_MAP(SmoothL1LossGrad) = {{"sigma", ATTR_DESC(sigma, AnyTraits<float>())}};
+OUTPUT_MAP(SmoothL1LossGrad) = {{0, OUTPUT_DESC(gradient)}};
+
+// SigmoidCrossEntropyWithLogits
+INPUT_MAP(SigmoidCrossEntropyWithLogits) = {{1, INPUT_DESC(predict)}, {2, INPUT_DESC(target)}};
+ATTR_MAP(SigmoidCrossEntropyWithLogits) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(SigmoidCrossEntropyWithLogits) = {{0, OUTPUT_DESC(loss)}};
+
+// SigmoidCrossEntropyWithLogitsGrad
+INPUT_MAP(SigmoidCrossEntropyWithLogitsGrad) = {
+  {1, INPUT_DESC(predict)}, {2, INPUT_DESC(target)}, {3, INPUT_DESC(dout)}};
+ATTR_MAP(SigmoidCrossEntropyWithLogitsGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(SigmoidCrossEntropyWithLogitsGrad) = {{0, OUTPUT_DESC(gradient)}};
+
+// ScatterNdD
+INPUT_MAP(ScatterNdD) = {{1, INPUT_DESC(indices)}, {2, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(ScatterNdD) = {
+  {3, ATTR_DESC(shape, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(ScatterNdD) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(ScatterNdD) = {{0, OUTPUT_DESC(y)}};
+
+// PadD
+INPUT_MAP(PadD) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(PadD) = {{"paddings", ATTR_DESC(paddings, AnyTraits<std::vector<std::vector<int64_t>>>())}};
+OUTPUT_MAP(PadD) = {{0, OUTPUT_DESC(y)}};
+
+// MirrorPad
+INPUT_MAP(MirrorPad) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(paddings)}};
+ATTR_MAP(MirrorPad) = {{"mode", ATTR_DESC(mode, AnyTraits<std::string>())}};
+OUTPUT_MAP(MirrorPad) = {{0, OUTPUT_DESC(y)}};
+
+// MirrorPadGrad
+INPUT_MAP(MirrorPadGrad) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(paddings)}};
+ATTR_MAP(MirrorPadGrad) = {{"mode", ATTR_DESC(mode, AnyTraits<std::string>())}};
+OUTPUT_MAP(MirrorPadGrad) = {{0, OUTPUT_DESC(y)}};
+
+// GatherNd
+INPUT_MAP(GatherNd) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(indices)}};
+ATTR_MAP(GatherNd) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(GatherNd) = {{0, OUTPUT_DESC(y)}};
+
+// ROIAlign
+INPUT_MAP(ROIAlign) = {{1, INPUT_DESC(features)}, {2, INPUT_DESC(rois)}};
+OUTPUT_MAP(ROIAlign) = {{0, OUTPUT_DESC(y)}};
+ATTR_MAP(ROIAlign) = {{"pooled_height", ATTR_DESC(pooled_height, AnyTraits<int>())},
+                      {"pooled_width", ATTR_DESC(pooled_width, AnyTraits<int>())},
+                      {"spatial_scale", ATTR_DESC(spatial_scale, AnyTraits<float>())},
+                      {"sample_num", ATTR_DESC(sample_num, AnyTraits<int>())},
+                      {"roi_end_mode", ATTR_DESC(roi_end_mode, AnyTraits<int>())}};
+
+// ROIAlignGrad
+INPUT_MAP(ROIAlignGrad) = {{1, INPUT_DESC(ydiff)}, {2, INPUT_DESC(rois)}};
+OUTPUT_MAP(ROIAlignGrad) = {{0, OUTPUT_DESC(xdiff)}};
+ATTR_MAP(ROIAlignGrad) = {
+  {"xdiff_shape", ATTR_DESC(xdiff_shape, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"pooled_height", ATTR_DESC(pooled_height, AnyTraits<int>())},
+  {"pooled_width", ATTR_DESC(pooled_width, AnyTraits<int>())},
+  {"spatial_scale", ATTR_DESC(spatial_scale, AnyTraits<float>())},
+  {"sample_num", ATTR_DESC(sample_num, AnyTraits<int>())}};
+
+// ArgMaxD
+INPUT_MAP(ArgMaxD) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ArgMaxD) = {{"axis", ATTR_DESC(dimension, AnyTraits<int>())},
+                     {"output_type", ATTR_DESC(dtype, AnyTraits<GEType>())}};
+OUTPUT_MAP(ArgMaxD) = {{0, OUTPUT_DESC(y)}};
+
+// ArgMinD
+INPUT_MAP(ArgMinD) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ArgMinD) = {{"axis", ATTR_DESC(dimension, AnyTraits<int>())},
+                     {"output_type", ATTR_DESC(dtype, AnyTraits<GEType>())}};
+OUTPUT_MAP(ArgMinD) = {{0, OUTPUT_DESC(y)}};
+
+// ArgMaxWithValue
+INPUT_MAP(ArgMaxWithValue) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ArgMaxWithValue) = {{"axis", ATTR_DESC(dimension, AnyTraits<int>())},
+                             {"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ArgMaxWithValue) = {{0, OUTPUT_DESC(indice)}, {1, OUTPUT_DESC(values)}};
+
+// ArgMinWithValue
+INPUT_MAP(ArgMinWithValue) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ArgMinWithValue) = {{"axis", ATTR_DESC(dimension, AnyTraits<int>())},
+                             {"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ArgMinWithValue) = {{0, OUTPUT_DESC(indice)}, {1, OUTPUT_DESC(values)}};
+
+// ReduceAllD
+INPUT_MAP(ReduceAllD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(ReduceAllD) = {
+  {2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(ReduceAllD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ReduceAllD) = {{0, OUTPUT_DESC(y)}};
+
+// ReduceMeanD
+INPUT_MAP(ReduceMeanD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(ReduceMeanD) = {
+  {2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(ReduceMeanD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ReduceMeanD) = {{0, OUTPUT_DESC(y)}};
+
+// HCOMAllreduce
+INPUT_MAP(HcomAllReduce) = {{1, INPUT_DESC(x)}};
+OUTPUT_MAP(HcomAllReduce) = {{0, OUTPUT_DESC(y)}};
+ATTR_MAP(HcomAllReduce) = {{"op", ATTR_DESC(reduction, AnyTraits<std::string>())},
+                           {"group", ATTR_DESC(group, AnyTraits<std::string>())},
+                           {"fusion", ATTR_DESC(fusion, AnyTraits<int>())}};
+
+// HCOMBraodcast
+INPUT_MAP(HcomBroadcast) = EMPTY_INPUT_MAP;
+DYN_INPUT_MAP(HcomBroadcast) = {{1, DYN_INPUT_DESC(x)}};
+DYN_OUTPUT_MAP(HcomBroadcast) = {{0, DYN_OUTPUT_DESC(y)}};
+ATTR_MAP(HcomBroadcast) = {{"root_rank", ATTR_DESC(root_rank, AnyTraits<int>())},
+                           {"group", ATTR_DESC(group, AnyTraits<std::string>())}};
+
+// HCOMAllreduce
+INPUT_MAP(HcomAllGather) = {{1, INPUT_DESC(x)}};
+OUTPUT_MAP(HcomAllGather) = {{0, OUTPUT_DESC(y)}};
+ATTR_MAP(HcomAllGather) = {{"group", ATTR_DESC(group, AnyTraits<std::string>())},
+                           {"rank_size", ATTR_DESC(rank_size, AnyTraits<int>())}};
+
+// HCOMReduceScatter
+INPUT_MAP(HcomReduceScatter) = {{1, INPUT_DESC(x)}};
+OUTPUT_MAP(HcomReduceScatter) = {{0, OUTPUT_DESC(y)}};
+ATTR_MAP(HcomReduceScatter) = {{"group", ATTR_DESC(group, AnyTraits<std::string>())},
+                               {"op", ATTR_DESC(reduction, AnyTraits<std::string>())},
+                               {"rank_size", ATTR_DESC(rank_size, AnyTraits<int>())}};
+
+// Variable
+INPUT_MAP(Variable) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Variable) = EMPTY_ATTR_MAP;
+
+// ReluGrad
+INPUT_MAP(ReluGrad) = {{1, INPUT_DESC(gradients)}, {2, INPUT_DESC(features)}};
+ATTR_MAP(ReluGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(ReluGrad) = {{0, OUTPUT_DESC(backprops)}};
+
+// BiasAddGrad
+INPUT_MAP(BiasAddGrad) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(BiasAddGrad) = {{"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())}};
+OUTPUT_MAP(BiasAddGrad) = {{0, OUTPUT_DESC(y)}};
+
+// MaxPoolGrad
+INPUT_MAP(MaxPoolGrad) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}, {3, INPUT_DESC(grad)}};
+ATTR_MAP(MaxPoolGrad) = {{"ksize", ATTR_DESC(ksize, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                         {"strides", ATTR_DESC(strides, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                         {"padding", ATTR_DESC(padding, AnyTraits<std::string>())},
+                         {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())}};
+OUTPUT_MAP(MaxPoolGrad) = {{0, OUTPUT_DESC(y)}};
+
+// avgpoolgrad
+INPUT_MAP(AvgPoolGrad) = {{1, INPUT_DESC(orig_input_shape)}, {2, INPUT_DESC(input_grad)}};
+ATTR_MAP(AvgPoolGrad) = {{"ksize", ATTR_DESC(ksize, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                         {"strides", ATTR_DESC(strides, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                         {"padding", ATTR_DESC(padding, AnyTraits<std::string>())},
+                         {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())}};
+OUTPUT_MAP(AvgPoolGrad) = {{0, OUTPUT_DESC(out_grad)}};
+
+// MaxPoolWithArgmax
+INPUT_MAP(MaxPoolWithArgmax) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(MaxPoolWithArgmax) = {{"ksize", ATTR_DESC(ksize, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                               {"strides", ATTR_DESC(strides, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                               {"padding", ATTR_DESC(padding, AnyTraits<std::string>())}};
+OUTPUT_MAP(MaxPoolWithArgmax) = {{0, OUTPUT_DESC(y)}, {1, OUTPUT_DESC(argmax)}};
+
+// MaxPoolGradWithArgmax
+INPUT_MAP(MaxPoolGradWithArgmax) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(grad)}, {3, INPUT_DESC(argmax)}};
+ATTR_MAP(MaxPoolGradWithArgmax) = {{"ksize", ATTR_DESC(ksize, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                                   {"strides", ATTR_DESC(strides, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                                   {"padding", ATTR_DESC(padding, AnyTraits<std::string>())}};
+OUTPUT_MAP(MaxPoolGradWithArgmax) = {{0, OUTPUT_DESC(y)}};
+
+// ExtractImagePatches
+INPUT_MAP(ExtractImagePatches) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(ExtractImagePatches) = {{"ksizes", ATTR_DESC(ksizes, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                                 {"strides", ATTR_DESC(strides, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                                 {"rates", ATTR_DESC(rates, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
+                                 {"padding", ATTR_DESC(padding, AnyTraits<std::string>())}};
+OUTPUT_MAP(ExtractImagePatches) = {{0, OUTPUT_DESC(y)}};
+
+// Conv2D
+INPUT_MAP(Conv2D) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(filter)}};
+ATTR_MAP(Conv2D) = {
+  {"stride", ATTR_DESC(strides, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"pad_list", ATTR_DESC(pads, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"dilation", ATTR_DESC(dilations, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())},
+  {"group", ATTR_DESC(groups, AnyTraits<int>())},
+};
+OUTPUT_MAP(Conv2D) = {{0, OUTPUT_DESC(y)}};
+
+// Conv2DBackpropInputD
+INPUT_MAP(Conv2DBackpropInputD) = {{1, INPUT_DESC(out_backprop)}, {2, INPUT_DESC(filter)}};
+INPUT_ATTR_MAP(Conv2DBackpropInputD) = {
+  {3, ATTR_DESC(input_size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(Conv2DBackpropInputD) = {
+  {"pad_list", ATTR_DESC(pads, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"stride", ATTR_DESC(strides, "pad", AnyTraits<std::vector<int64_t>>())},
+  {"dilation", ATTR_DESC(dilations, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())},
+  {"group", ATTR_DESC(groups, AnyTraits<int>())},
+};
+OUTPUT_MAP(Conv2DBackpropInputD) = {{0, OUTPUT_DESC(y)}};
+
+// Conv2DBackpropFilterD
+INPUT_MAP(Conv2DBackpropFilterD) = {{1, INPUT_DESC(out_backprop)}, {2, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(Conv2DBackpropFilterD) = {
+  {3, ATTR_DESC(filter_size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(Conv2DBackpropFilterD) = {
+  {"pad_list", ATTR_DESC(pads, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"stride", ATTR_DESC(strides, "pad", AnyTraits<std::vector<int64_t>>())},
+  {"dilation", ATTR_DESC(dilations, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())},
+  {"group", ATTR_DESC(groups, AnyTraits<int>())},
+};
+OUTPUT_MAP(Conv2DBackpropFilterD) = {{0, OUTPUT_DESC(y)}};
+
+// DepthwiseConv2D
+INPUT_MAP(DepthwiseConv2D) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(filter)}};
+ATTR_MAP(DepthwiseConv2D) = {
+  {"stride", ATTR_DESC(strides, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"pads", ATTR_DESC(pads, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"dilation", ATTR_DESC(dilations, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())},
+};
+OUTPUT_MAP(DepthwiseConv2D) = {{0, OUTPUT_DESC(y)}};
+
+// DepthwiseConv2DBackpropInputD
+INPUT_MAP(DepthwiseConv2DBackpropInputD) = {{2, INPUT_DESC(filter)}, {3, INPUT_DESC(out_backprop)}};
+INPUT_ATTR_MAP(DepthwiseConv2DBackpropInputD) = {
+  {1, ATTR_DESC(input_size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(DepthwiseConv2DBackpropInputD) = {
+  {"stride", ATTR_DESC(strides, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"pads", ATTR_DESC(pads, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"dilation", ATTR_DESC(dilations, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+};
+OUTPUT_MAP(DepthwiseConv2DBackpropInputD) = {{0, OUTPUT_DESC(input_grad)}};
+
+// DepthwiseConv2DBackpropFilterD
+INPUT_MAP(DepthwiseConv2DBackpropFilterD) = {{1, INPUT_DESC(input)}, {3, INPUT_DESC(out_backprop)}};
+INPUT_ATTR_MAP(DepthwiseConv2DBackpropFilterD) = {
+  {2, ATTR_DESC(filter_size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(DepthwiseConv2DBackpropFilterD) = {
+  {"stride", ATTR_DESC(strides, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"pads", ATTR_DESC(pads, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+  {"dilation", ATTR_DESC(dilations, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
+};
+OUTPUT_MAP(DepthwiseConv2DBackpropFilterD) = {{0, OUTPUT_DESC(filter_grad)}};
+
+// MatMulV2
+INPUT_MAP(MatMulV2) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(MatMulV2) = {{"transpose_a", ATTR_DESC(transpose_x1, AnyTraits<bool>())},
+                      {"transpose_b", ATTR_DESC(transpose_x2, AnyTraits<bool>())}};
+OUTPUT_MAP(MatMulV2) = {{0, OUTPUT_DESC(y)}};
+
+// Merge
+INPUT_MAP(Merge) = EMPTY_INPUT_MAP;
+DYN_INPUT_MAP(Merge) = {{1, DYN_INPUT_DESC(x)}};
+ATTR_MAP(Merge) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Merge) = {{0, OUTPUT_DESC(y)}, {1, OUTPUT_DESC(value_index)}};
+
+// Switch
+INPUT_MAP(Switch) = {{1, INPUT_DESC(data)}, {2, INPUT_DESC(pred)}};
+OUTPUT_MAP(Switch) = {{0, OUTPUT_DESC(output_false)}, {1, OUTPUT_DESC(output_true)}};
+ATTR_MAP(Switch) = EMPTY_ATTR_MAP;
+
+// AddN
+INPUT_MAP(AddN) = EMPTY_INPUT_MAP;
+DYN_INPUT_MAP(AddN) = {{1, DYN_INPUT_DESC(x)}};
+ATTR_MAP(AddN) = {{"n", ATTR_DESC(N, AnyTraits<int64_t>())}};
+OUTPUT_MAP(AddN) = {{0, OUTPUT_DESC(y)}};
+
+// Mul
+INPUT_MAP(Mul) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Mul) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Mul) = {{0, OUTPUT_DESC(y)}};
+
+// RealDiv
+INPUT_MAP(RealDiv) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(RealDiv) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(RealDiv) = {{0, OUTPUT_DESC(y)}};
+
+// Cast
+INPUT_MAP(Cast) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(Cast) = {{2, ATTR_DESC(dst_type, AnyTraits<GEType>())}};
+ATTR_MAP(Cast) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Cast) = {{0, OUTPUT_DESC(y)}};
+
+// Case
+INPUT_MAP(Case) = {{1, INPUT_DESC(branch_index)}};
+DYN_INPUT_MAP(Case) = {{2, DYN_INPUT_DESC(input)}};
+ATTR_MAP(Case) = EMPTY_ATTR_MAP;
+DYN_OUTPUT_MAP(Case) = {{0, DYN_OUTPUT_DESC(output)}};
+DYN_SUBGRAPH_MAP(Case) = {{0, DYN_SUBGRAPH_DESC(branches)}};
+
+// Reciprocal
+INPUT_MAP(Reciprocal) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Reciprocal) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Reciprocal) = {{0, OUTPUT_DESC(y)}};
+
+// Sub
+INPUT_MAP(Sub) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Sub) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Sub) = {{0, OUTPUT_DESC(y)}};
+
+// SplitD
+INPUT_MAP(SplitD) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(SplitD) = {{"axis", ATTR_DESC(split_dim, AnyTraits<int>())},
+                    {"output_num", ATTR_DESC(num_split, AnyTraits<int>())}};
+DYN_OUTPUT_MAP(SplitD) = {{0, DYN_OUTPUT_DESC(y)}};
+
+// Range
+INPUT_MAP(RangeD) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(RangeD) = {{"start", ATTR_DESC(start, AnyTraits<float>())},
+                    {"limit", ATTR_DESC(limit, AnyTraits<float>())},
+                    {"delta", ATTR_DESC(delta, AnyTraits<float>())}};
+OUTPUT_MAP(RangeD) = {{0, OUTPUT_DESC(y)}};
+
+// Neg
+INPUT_MAP(Neg) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Neg) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Neg) = {{0, OUTPUT_DESC(y)}};
+
+// Transpose
+INPUT_MAP(TransposeD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(TransposeD) = {{2, ATTR_DESC(perm, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(TransposeD) = EMPTY_ATTR_MAP;
+// Do not set Transpose operator output descriptor
+
+// DropOutGenMask
+INPUT_MAP(DropOutGenMask) = {{1, INPUT_DESC(shape)}, {2, INPUT_DESC(prob)}};
+ATTR_MAP(DropOutGenMask) = {{"Seed0", ATTR_DESC(seed, AnyTraits<int64_t>())},
+                            {"Seed1", ATTR_DESC(seed2, AnyTraits<int64_t>())}};
+OUTPUT_MAP(DropOutGenMask) = {{0, OUTPUT_DESC(y)}};
+
+// Pack
+INPUT_MAP(Pack) = EMPTY_INPUT_MAP;
+DYN_INPUT_MAP(Pack) = {{1, DYN_INPUT_DESC(x)}};
+ATTR_MAP(Pack) = {{"num", ATTR_DESC(N, AnyTraits<int>())}, {"axis", ATTR_DESC(axis, AnyTraits<int>())}};
+OUTPUT_MAP(Pack) = {{0, OUTPUT_DESC(y)}};
+
+// ConcatD
+INPUT_MAP(ConcatD) = EMPTY_INPUT_MAP;
+DYN_INPUT_MAP(ConcatD) = {{1, DYN_INPUT_DESC(x)}};
+ATTR_MAP(ConcatD) = {
+  {"axis", ATTR_DESC(concat_dim, AnyTraits<int>())},
+  {"inputNums", ATTR_DESC(N, AnyTraits<int>())},
+};
+OUTPUT_MAP(ConcatD) = {{0, OUTPUT_DESC(y)}};
+
+// Less
+INPUT_MAP(Less) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Less) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Less) = {{0, OUTPUT_DESC(y)}};
+
+// Rsqrt
+INPUT_MAP(Rsqrt) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Rsqrt) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Rsqrt) = {{0, OUTPUT_DESC(y)}};
+
+// Sqrt
+INPUT_MAP(Sqrt) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Sqrt) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Sqrt) = {{0, OUTPUT_DESC(y)}};
+
+// Square
+INPUT_MAP(Square) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Square) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Square) = {{0, OUTPUT_DESC(y)}};
+
+// SquareSumAll
+INPUT_MAP(SquareSumAll) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(SquareSumAll) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(SquareSumAll) = {{0, OUTPUT_DESC(y1)}, {1, OUTPUT_DESC(y2)}};
+
+// Tanh
+INPUT_MAP(Tanh) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Tanh) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Tanh) = {{0, OUTPUT_DESC(y)}};
+
+// TanhGrad
+INPUT_MAP(TanhGrad) = {{1, INPUT_DESC(y)}, {2, INPUT_DESC(dy)}};
+ATTR_MAP(TanhGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(TanhGrad) = {{0, OUTPUT_DESC(z)}};
+
+// ReduceMinD
+INPUT_MAP(ReduceMinD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(ReduceMinD) = {
+  {2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(ReduceMinD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ReduceMinD) = {{0, OUTPUT_DESC(y)}};
+
+// ReduceMaxD
+INPUT_MAP(ReduceMaxD) = {{1, INPUT_DESC(x)}};
+INPUT_ATTR_MAP(ReduceMaxD) = {
+  {2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+ATTR_MAP(ReduceMaxD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
+OUTPUT_MAP(ReduceMaxD) = {{0, OUTPUT_DESC(y)}};
+
+// Maximum
+INPUT_MAP(Maximum) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Maximum) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Maximum) = {{0, OUTPUT_DESC(y)}};
+
+// Minimum
+INPUT_MAP(Minimum) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Minimum) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Minimum) = {{0, OUTPUT_DESC(y)}};
+
+// MaximumGrad
+INPUT_MAP(MaximumGrad) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}, {3, INPUT_DESC(grads)}};
+ATTR_MAP(MaximumGrad) = {{"grad_x", ATTR_DESC(grad_x, AnyTraits<bool>())},
+                         {"grad_y", ATTR_DESC(grad_y, AnyTraits<bool>())}};
+OUTPUT_MAP(MaximumGrad) = {{0, OUTPUT_DESC(y1)}, {1, OUTPUT_DESC(y2)}};
+
+// MinimumGrad
+INPUT_MAP(MinimumGrad) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}, {3, INPUT_DESC(grads)}};
+ATTR_MAP(MinimumGrad) = {{"grad_x", ATTR_DESC(grad_x, AnyTraits<bool>())},
+                         {"grad_y", ATTR_DESC(grad_y, AnyTraits<bool>())}};
+OUTPUT_MAP(MinimumGrad) = {{0, OUTPUT_DESC(y1)}, {1, OUTPUT_DESC(y2)}};
+
+// Pow
+INPUT_MAP(Pow) = {
+  {1, INPUT_DESC(x1)},
+  {2, INPUT_DESC(x2)},
+};
+ATTR_MAP(Pow) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Pow) = {{0, OUTPUT_DESC(y)}};
+
+// Equal
+INPUT_MAP(Equal) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Equal) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Equal) = {{0, OUTPUT_DESC(y)}};
+
+// NotEqual
+INPUT_MAP(NotEqual) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(NotEqual) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(NotEqual) = {{0, OUTPUT_DESC(y)}};
+
+// Log
+INPUT_MAP(Log) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Log) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Log) = {{0, OUTPUT_DESC(y)}};
+
+// LogicalAnd
+INPUT_MAP(LogicalAnd) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(LogicalAnd) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(LogicalAnd) = {{0, OUTPUT_DESC(y)}};
+
+// LogicalOr
+INPUT_MAP(LogicalOr) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(LogicalOr) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(LogicalOr) = {{0, OUTPUT_DESC(y)}};
+
+// LogicalNot
+INPUT_MAP(LogicalNot) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(LogicalNot) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(LogicalNot) = {{0, OUTPUT_DESC(y)}};
+
+// Greater
+INPUT_MAP(Greater) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Greater) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Greater) = {{0, OUTPUT_DESC(y)}};
+
+// LogSoftmaxGrad
+INPUT_MAP(LogSoftmaxGrad) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(grad)}};
+ATTR_MAP(LogSoftmaxGrad) = {
+  {"axis", ATTR_DESC(axis, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+OUTPUT_MAP(LogSoftmaxGrad) = {{0, OUTPUT_DESC(y)}};
+
+// Select
+INPUT_MAP(Select) = {{1, INPUT_DESC(condition)}, {2, INPUT_DESC(x1)}, {3, INPUT_DESC(x2)}};
+ATTR_MAP(Select) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Select) = {{0, OUTPUT_DESC(y)}};
+
+// LessEqual
+INPUT_MAP(LessEqual) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(LessEqual) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(LessEqual) = {{0, OUTPUT_DESC(y)}};
+
+// LogSoftmaxV2
+INPUT_MAP(LogSoftmaxV2) = {{1, INPUT_DESC(logits)}};
+ATTR_MAP(LogSoftmaxV2) = {
+  {"axis", ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
+OUTPUT_MAP(LogSoftmaxV2) = {{0, OUTPUT_DESC(logsoftmax)}};
+
+// RandomChoiceWithMask
+INPUT_MAP(RandomChoiceWithMask) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(RandomChoiceWithMask) = {{"count", ATTR_DESC(count, AnyTraits<int64_t>())},
+                                  {"seed", ATTR_DESC(seed, AnyTraits<int64_t>())},
+                                  {"seed2", ATTR_DESC(seed2, AnyTraits<int64_t>())}};
+OUTPUT_MAP(RandomChoiceWithMask) = {{0, OUTPUT_DESC(y)}, {1, OUTPUT_DESC(mask)}};
+
+// TruncatedNormal
+INPUT_MAP(TruncatedNormal) = {{1, INPUT_DESC(shape)}};
+ATTR_MAP(TruncatedNormal) = {{"seed", ATTR_DESC(seed, AnyTraits<int64_t>())},
+                             {"seed2", ATTR_DESC(seed2, AnyTraits<int64_t>())}};
+OUTPUT_MAP(TruncatedNormal) = {{0, OUTPUT_DESC(y)}};
+
+// StridedSliceGrad
+INPUT_MAP(StridedSliceGrad) = {
+  {1, INPUT_DESC(dy)}, {2, INPUT_DESC(shape)}, {3, INPUT_DESC(begin)}, {4, INPUT_DESC(end)}, {5, INPUT_DESC(strides)}};
+ATTR_MAP(StridedSliceGrad) = {{"begin_mask", ATTR_DESC(begin_mask, AnyTraits<int64_t>())},
+                              {"end_mask", ATTR_DESC(end_mask, AnyTraits<int64_t>())},
+                              {"ellipsis_mask", ATTR_DESC(ellipsis_mask, AnyTraits<int64_t>())},
+                              {"new_axis_mask", ATTR_DESC(new_axis_mask, AnyTraits<int64_t>())},
+                              {"shrink_axis_mask", ATTR_DESC(shrink_axis_mask, AnyTraits<int64_t>())}};
+OUTPUT_MAP(StridedSliceGrad) = {{0, OUTPUT_DESC(output)}};
+
+// Gelu
+INPUT_MAP(Gelu) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Gelu) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Gelu) = {{0, OUTPUT_DESC(y)}};
+
+// GeluGrad
+INPUT_MAP(GeluGrad) = {{1, INPUT_DESC(dy)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(y)}};
+ATTR_MAP(GeluGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(GeluGrad) = {{0, OUTPUT_DESC(z)}};
+
+// StridedSlice
+INPUT_MAP(StridedSlice) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(begin)}, {3, INPUT_DESC(end)}, {4, INPUT_DESC(strides)}};
+ATTR_MAP(StridedSlice) = {{"begin_mask", ATTR_DESC(begin_mask, AnyTraits<int64_t>())},
+                          {"end_mask", ATTR_DESC(end_mask, AnyTraits<int64_t>())},
+                          {"ellipsis_mask", ATTR_DESC(ellipsis_mask, AnyTraits<int64_t>())},
+                          {"new_axis_mask", ATTR_DESC(new_axis_mask, AnyTraits<int64_t>())},
+                          {"shrink_axis_mask", ATTR_DESC(shrink_axis_mask, AnyTraits<int64_t>())}};
+OUTPUT_MAP(StridedSlice) = {{0, OUTPUT_DESC(y)}};
+
+// UnsortedSegmentSum
+INPUT_MAP(UnsortedSegmentSumD) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(segment_ids)}};
+INPUT_ATTR_MAP(UnsortedSegmentSumD) = {{3, ATTR_DESC(num_segments, AnyTraits<int64_t>())}};
+ATTR_MAP(UnsortedSegmentSumD) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(UnsortedSegmentSumD) = {{0, OUTPUT_DESC(y)}};
+
+// UnsortedSegmentMin
+INPUT_MAP(UnsortedSegmentMin) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(segment_ids)}, {3, INPUT_DESC(num_segments)}};
+ATTR_MAP(UnsortedSegmentMin) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(UnsortedSegmentMin) = {{0, OUTPUT_DESC(y)}};
+
+// ExpandDims
+INPUT_MAP(ExpandDims) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(axis)}};
+ATTR_MAP(ExpandDims) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(ExpandDims) = {{0, OUTPUT_DESC(y)}};
+
+// Squeeze
+INPUT_MAP(Squeeze) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Squeeze) = {{"axis", ATTR_DESC(axis, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())}};
+OUTPUT_MAP(Squeeze) = {{0, OUTPUT_DESC(y)}};
+
+// SGD
+INPUT_MAP(SGD) = {{1, INPUT_DESC(parameters)}, {2, INPUT_DESC(gradient)}, {3, INPUT_DESC(learning_rate)},
+                  {4, INPUT_DESC(accum)},      {5, INPUT_DESC(momentum)}, {6, INPUT_DESC(stat)}};
+ATTR_MAP(SGD) = {{"dampening", ATTR_DESC(dampening, AnyTraits<float>())},
+                 {"weight_decay", ATTR_DESC(weight_decay, AnyTraits<float>())},
+                 {"nesterov", ATTR_DESC(nesterov, AnyTraits<bool>())}};
+OUTPUT_MAP(SGD) = {{0, OUTPUT_DESC(parameters)}};
+
+// LayerNorm
+INPUT_MAP(LayerNorm) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(gamma)}, {3, INPUT_DESC(beta)}};
+ATTR_MAP(LayerNorm) = {{"begin_norm_axis", ATTR_DESC(begin_norm_axis, AnyTraits<int>())},
+                       {"begin_params_axis", ATTR_DESC(begin_params_axis, AnyTraits<int>())},
+                       {"epsilon", ATTR_DESC(epsilon, AnyTraits<float>())}};
+OUTPUT_MAP(LayerNorm) = {{0, OUTPUT_DESC(y)}, {1, OUTPUT_DESC(mean)}, {2, OUTPUT_DESC(variance)}};
+
+// LayerNormGrad
+INPUT_MAP(LayerNormGrad) = {
+  {1, INPUT_DESC(x)}, {2, INPUT_DESC(dy)}, {3, INPUT_DESC(variance)}, {4, INPUT_DESC(mean)}, {5, INPUT_DESC(gamma)}};
+ATTR_MAP(LayerNormGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(LayerNormGrad) = {{0, OUTPUT_DESC(pd_x)}, {1, OUTPUT_DESC(pd_gamma)}, {2, OUTPUT_DESC(pd_beta)}};
+
+// BatchMatMul
+INPUT_MAP(BatchMatMul) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(BatchMatMul) = {{"transpose_x1", ATTR_DESC(adj_x1, AnyTraits<bool>())},
+                         {"transpose_x2", ATTR_DESC(adj_x2, AnyTraits<bool>())}};
+OUTPUT_MAP(BatchMatMul) = {{0, OUTPUT_DESC(y)}};
+
+// DropoutDoMask
+INPUT_MAP(DropOutDoMask) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(mask)}, {3, INPUT_DESC(keep_prob)}};
+ATTR_MAP(DropOutDoMask) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(DropOutDoMask) = {{0, OUTPUT_DESC(y)}};
+
+// NPUGetFloatStatus
+INPUT_MAP(NPUGetFloatStatus) = {{1, INPUT_DESC(addr)}};
+OUTPUT_MAP(NPUGetFloatStatus) = {{0, OUTPUT_DESC(data)}};
+ATTR_MAP(NPUGetFloatStatus) = EMPTY_ATTR_MAP;
+
+// NPUAllocFloatStatus
+INPUT_MAP(NPUAllocFloatStatus) = EMPTY_INPUT_MAP;
+ATTR_MAP(NPUAllocFloatStatus) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(NPUAllocFloatStatus) = {{0, OUTPUT_DESC(data)}};
+
+// NPUClearFloatStatus
+INPUT_MAP(NPUClearFloatStatus) = {{1, INPUT_DESC(addr)}};
+OUTPUT_MAP(NPUClearFloatStatus) = {{0, OUTPUT_DESC(data)}};
+ATTR_MAP(NPUClearFloatStatus) = EMPTY_ATTR_MAP;
+
+// Abs
+INPUT_MAP(Abs) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Abs) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Abs) = {{0, OUTPUT_DESC(y)}};
+
+// AbsGrad
+INPUT_MAP(AbsGrad) = {{1, INPUT_DESC(y)}, {2, INPUT_DESC(dy)}};
+ATTR_MAP(AbsGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(AbsGrad) = {{0, OUTPUT_DESC(z)}};
+
+// BinaryCrossEntropy
+INPUT_MAP(BinaryCrossEntropy) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(y)}, {3, INPUT_DESC(weight)}};
+ATTR_MAP(BinaryCrossEntropy) = {{"reduction", ATTR_DESC(reduction, AnyTraits<std::string>())}};
+OUTPUT_MAP(BinaryCrossEntropy) = {{0, OUTPUT_DESC(output)}};
+
+// BinaryCrossEntropyGrad
+INPUT_MAP(BinaryCrossEntropyGrad) = {
+  {1, INPUT_DESC(x)}, {2, INPUT_DESC(y)}, {3, INPUT_DESC(grad_output)}, {4, INPUT_DESC(weight)}};
+ATTR_MAP(BinaryCrossEntropyGrad) = {{"reduction", ATTR_DESC(reduction, AnyTraits<std::string>())}};
+OUTPUT_MAP(BinaryCrossEntropyGrad) = {{0, OUTPUT_DESC(output)}};
+
+// SparseApplyAdagradD
+INPUT_MAP(SparseApplyAdagradD) = {
+  {1, INPUT_DESC(var)}, {2, INPUT_DESC(accum)}, {3, INPUT_DESC(grad)}, {4, INPUT_DESC(indices)}};
+ATTR_MAP(SparseApplyAdagradD) = {{"lr", ATTR_DESC(lr, AnyTraits<float>())},
+                                 {"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(SparseApplyAdagradD) = {{0, OUTPUT_DESC(var)}};
+
+// ApplyProximalAdagradD
+INPUT_MAP(ApplyProximalAdagradD) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(accum)}, {3, INPUT_DESC(lr)},
+                                    {4, INPUT_DESC(l1)},  {5, INPUT_DESC(l2)},    {6, INPUT_DESC(grad)}};
+ATTR_MAP(ApplyProximalAdagradD) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ApplyProximalAdagradD) = {{0, OUTPUT_DESC(var)}, {1, OUTPUT_DESC(accum)}};
+
+// SparseApplyFtrlD
+INPUT_MAP(SparseApplyFtrlD) = {{1, INPUT_DESC(var)},
+                               {2, INPUT_DESC(accum)},
+                               {3, INPUT_DESC(linear)},
+                               {4, INPUT_DESC(grad)},
+                               {5, INPUT_DESC(indices)}};
+ATTR_MAP(SparseApplyFtrlD) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())},
+                              {"lr", ATTR_DESC(lr, AnyTraits<float>())},
+                              {"l1", ATTR_DESC(l1, AnyTraits<float>())},
+                              {"l2", ATTR_DESC(l2, AnyTraits<float>())},
+                              {"lr_power", ATTR_DESC(lr_power, AnyTraits<float>())}};
+OUTPUT_MAP(SparseApplyFtrlD) = {{0, OUTPUT_DESC(var)}};
+
+// SpaceToDepth
+INPUT_MAP(SpaceToDepth) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(SpaceToDepth) = {{"block_size", ATTR_DESC(block_size, AnyTraits<int64_t>())}};
+OUTPUT_MAP(SpaceToDepth) = {{0, OUTPUT_DESC(y)}};
+
+// DepthToSpace
+INPUT_MAP(DepthToSpace) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(DepthToSpace) = {{"block_size", ATTR_DESC(block_size, AnyTraits<int64_t>())}};
+OUTPUT_MAP(DepthToSpace) = {{0, OUTPUT_DESC(y)}};
+
+// Sign
+INPUT_MAP(Sign) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Sign) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Sign) = {{0, OUTPUT_DESC(y)}};
+
+// Round
+INPUT_MAP(Round) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Round) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Round) = {{0, OUTPUT_DESC(y)}};
+
+// ApplyFtrlD
+INPUT_MAP(ApplyFtrlD) = {{1, INPUT_DESC(var)},  {2, INPUT_DESC(accum)},   {3, INPUT_DESC(linear)},
+                         {4, INPUT_DESC(grad)}, {5, INPUT_DESC(lr)},      {6, INPUT_DESC(l1)},
+                         {7, INPUT_DESC(l2)},   {8, INPUT_DESC(lr_power)}};
+ATTR_MAP(ApplyFtrlD) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ApplyFtrlD) = {{0, OUTPUT_DESC(var)}, {1, OUTPUT_DESC(accum)}, {2, OUTPUT_DESC(linear)}};
+
+// Diag
+INPUT_MAP(Diag) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(Diag) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Diag) = {{0, OUTPUT_DESC(y)}};
+
+// DiagPart
+INPUT_MAP(DiagPart) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(DiagPart) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(DiagPart) = {{0, OUTPUT_DESC(y)}};
+
+// SpaceToBatchD
+INPUT_MAP(SpaceToBatchD) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(SpaceToBatchD) = {
+  {"block_size", ATTR_DESC(block_size, AnyTraits<int64_t>())},
+  {"paddings", ATTR_DESC(paddings, AnyTraits<std::vector<std::vector<int64_t>>>(), AnyTraits<std::vector<int64_t>>())}};
+OUTPUT_MAP(SpaceToBatchD) = {{0, OUTPUT_DESC(y)}};
+
+// BatchToSpaceD
+INPUT_MAP(BatchToSpaceD) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(BatchToSpaceD) = {
+  {"block_size", ATTR_DESC(block_size, AnyTraits<int64_t>())},
+  {"crops", ATTR_DESC(crops, AnyTraits<std::vector<std::vector<int64_t>>>(), AnyTraits<std::vector<int64_t>>())}};
+OUTPUT_MAP(BatchToSpaceD) = {{0, OUTPUT_DESC(y)}};
+
+// Atan2
+INPUT_MAP(Atan2) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
+ATTR_MAP(Atan2) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(Atan2) = {{0, OUTPUT_DESC(y)}};
+
+// ApplyRMSPropD
+INPUT_MAP(ApplyRMSPropD) = {
+  {1, INPUT_DESC(var)}, {2, INPUT_DESC(ms)}, {3, INPUT_DESC(mom)}, {4, INPUT_DESC(lr)}, {5, INPUT_DESC(grad)}};
+INPUT_ATTR_MAP(ApplyRMSPropD) = {{6, ATTR_DESC(rho, AnyTraits<float>())},
+                                 {7, ATTR_DESC(momentum, AnyTraits<float>())},
+                                 {8, ATTR_DESC(epsilon, AnyTraits<float>())}};
+ATTR_MAP(ApplyRMSPropD) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ApplyRMSPropD) = {{0, OUTPUT_DESC(var)}};
+
+// ApplyCenteredRMSProp
+INPUT_MAP(ApplyCenteredRMSProp) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(mg)},       {3, INPUT_DESC(ms)},
+                                   {4, INPUT_DESC(mom)}, {5, INPUT_DESC(grad)},     {6, INPUT_DESC(lr)},
+                                   {7, INPUT_DESC(rho)}, {8, INPUT_DESC(momentum)}, {9, INPUT_DESC(epsilon)}};
+ATTR_MAP(ApplyCenteredRMSProp) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
+OUTPUT_MAP(ApplyCenteredRMSProp) = {{0, OUTPUT_DESC(var)}};
+
+// L2Loss
+INPUT_MAP(L2Loss) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(L2Loss) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(L2Loss) = {{0, OUTPUT_DESC(y)}};
+
+// CTCLoss
+INPUT_MAP(CTCLoss) = {{1, INPUT_DESC(inputs)},
+                      {2, INPUT_DESC(labels_indices)},
+                      {3, INPUT_DESC(labels_values)},
+                      {4, INPUT_DESC(sequence_length)}};
+ATTR_MAP(CTCLoss) = {
+  {"preprocess_collapse_repeated", ATTR_DESC(preprocess_collapse_repeated, AnyTraits<bool>())},
+  {"ctc_merge_repeated", ATTR_DESC(ctc_merge_repeated, AnyTraits<bool>())},
+  {"ignore_longer_outputs_than_inputs", ATTR_DESC(ignore_longer_outputs_than_inputs, AnyTraits<bool>())}};
+OUTPUT_MAP(CTCLoss) = {{0, OUTPUT_DESC(loss)}, {1, OUTPUT_DESC(gradient)}};
+
+// AscendQuant
+INPUT_MAP(AscendQuant) = {{1, INPUT_DESC(x)}};
+ATTR_MAP(AscendQuant) = {{"scale", ATTR_DESC(scale, AnyTraits<float>())},
+                         {"offset", ATTR_DESC(offset, AnyTraits<float>())},
+                         {"sqrt_mode", ATTR_DESC(sqrt_mode, AnyTraits<bool>())},
+                         {"round_mode", ATTR_DESC(round_mode, AnyTraits<std::string>())}};
+OUTPUT_MAP(AscendQuant) = {{0, OUTPUT_DESC(y)}};
+
+// AscendDequant
+INPUT_MAP(AscendDequant) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(deq_scale)}};
+ATTR_MAP(AscendDequant) = {{"sqrt_mode", ATTR_DESC(sqrt_mode, AnyTraits<bool>())},
+                           {"relu_flag", ATTR_DESC(relu_flag, AnyTraits<bool>())}};
+OUTPUT_MAP(AscendDequant) = {{0, OUTPUT_DESC(y)}};
+#ifdef ENABLE_GE
+// Print
+INPUT_MAP(Print) = EMPTY_INPUT_MAP;
+DYN_INPUT_MAP(Print) = {{1, DYN_INPUT_DESC(x)}};
+ATTR_MAP(Print) = EMPTY_ATTR_MAP;
+#endif
+}  // namespace transform
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/transform/op_declare.h b/mindspore/ccsrc/transform/graph_ir/op_declare.h
similarity index 97%
rename from mindspore/ccsrc/transform/op_declare.h
rename to mindspore/ccsrc/transform/graph_ir/op_declare.h
index baa819f71fa..e493ea0e528 100755
--- a/mindspore/ccsrc/transform/op_declare.h
+++ b/mindspore/ccsrc/transform/graph_ir/op_declare.h
@@ -19,7 +19,7 @@
 
 #include <string>
 #include <unordered_map>
-#include "transform/op_adapter.h"
+#include "transform/graph_ir/op_adapter.h"
 
 namespace mindspore {
 namespace transform {
@@ -46,6 +46,10 @@ namespace transform {
   template <>                       \
   const std::unordered_map<int, DynInputDesc> OpAdapter<T>::dyn_input_map_;
 
+#define DECLARE_OP_USE_DYN_SUBGRAPH(T) \
+  template <>                          \
+  const std::unordered_map<int, DynSubGraphDesc> OpAdapter<T>::dyn_subgraph_map_;
+
 #define DECLARE_OP_USE_DYN_OUTPUT(T) \
   template <>                        \
   const std::unordered_map<int, DynOutputDesc> OpAdapter<T>::dyn_output_map_;
@@ -235,6 +239,10 @@ DECLARE_OP_USE_OUTPUT(RealDiv)
 DECLARE_OP_ADAPTER(Cast)
 DECLARE_OP_USE_INPUT_ATTR(Cast)
 DECLARE_OP_USE_OUTPUT(Cast)
+DECLARE_OP_ADAPTER(Case)
+DECLARE_OP_USE_DYN_INPUT(Case)
+DECLARE_OP_USE_DYN_SUBGRAPH(Case)
+DECLARE_OP_USE_DYN_OUTPUT(Case)
 DECLARE_OP_ADAPTER(Reciprocal)
 DECLARE_OP_USE_OUTPUT(Reciprocal)
 DECLARE_OP_ADAPTER(Neg)
@@ -313,8 +321,8 @@ DECLARE_OP_ADAPTER(NPUAllocFloatStatus)
 DECLARE_OP_USE_OUTPUT(NPUAllocFloatStatus)
 DECLARE_OP_ADAPTER(NPUClearFloatStatus)
 DECLARE_OP_USE_OUTPUT(NPUClearFloatStatus)
-DECLARE_OP_ADAPTER(MatMul)
-DECLARE_OP_USE_OUTPUT(MatMul)
+DECLARE_OP_ADAPTER(MatMulV2)
+DECLARE_OP_USE_OUTPUT(MatMulV2)
 
 DECLARE_OP_ADAPTER(SoftmaxCrossEntropyWithLogits)
 DECLARE_OP_USE_OUTPUT(SoftmaxCrossEntropyWithLogits)
diff --git a/mindspore/ccsrc/transform/types.h b/mindspore/ccsrc/transform/graph_ir/types.h
similarity index 100%
rename from mindspore/ccsrc/transform/types.h
rename to mindspore/ccsrc/transform/graph_ir/types.h
diff --git a/mindspore/ccsrc/transform/util.cc b/mindspore/ccsrc/transform/graph_ir/util.cc
similarity index 99%
rename from mindspore/ccsrc/transform/util.cc
rename to mindspore/ccsrc/transform/graph_ir/util.cc
index b848ec117b7..6ae665d69ff 100644
--- a/mindspore/ccsrc/transform/util.cc
+++ b/mindspore/ccsrc/transform/graph_ir/util.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "transform/util.h"
+#include "transform/graph_ir/util.h"
 
 #include <utility>
 #include <sstream>
diff --git a/mindspore/ccsrc/transform/util.h b/mindspore/ccsrc/transform/graph_ir/util.h
similarity index 99%
rename from mindspore/ccsrc/transform/util.h
rename to mindspore/ccsrc/transform/graph_ir/util.h
index 5d8db26ad1b..32d4242c4f8 100644
--- a/mindspore/ccsrc/transform/util.h
+++ b/mindspore/ccsrc/transform/graph_ir/util.h
@@ -25,7 +25,7 @@
 #include "ir/anf.h"
 #include "ir/dtype.h"
 #include "ir/tensor.h"
-#include "transform/types.h"
+#include "transform/graph_ir/types.h"
 
 #include "graph/tensor.h"
 
diff --git a/mindspore/ccsrc/onnx/CMakeLists.txt b/mindspore/ccsrc/transform/onnx/CMakeLists.txt
similarity index 72%
rename from mindspore/ccsrc/onnx/CMakeLists.txt
rename to mindspore/ccsrc/transform/onnx/CMakeLists.txt
index a65ea6d450a..0d2f6c947bc 100644
--- a/mindspore/ccsrc/onnx/CMakeLists.txt
+++ b/mindspore/ccsrc/transform/onnx/CMakeLists.txt
@@ -1,3 +1,3 @@
 file(GLOB_RECURSE _ONNX_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_ONNX_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ONNX)
-add_library(_mindspore_onnx_obj OBJECT ${_ONNX_SRC_FILES})
+add_library(_mindspore_transform_onnx_obj OBJECT ${_ONNX_SRC_FILES})
diff --git a/mindspore/ccsrc/onnx/ir_exporter.cc b/mindspore/ccsrc/transform/onnx/ir_exporter.cc
similarity index 98%
rename from mindspore/ccsrc/onnx/ir_exporter.cc
rename to mindspore/ccsrc/transform/onnx/ir_exporter.cc
index 2f02f483f57..78858eea8ad 100644
--- a/mindspore/ccsrc/onnx/ir_exporter.cc
+++ b/mindspore/ccsrc/transform/onnx/ir_exporter.cc
@@ -23,10 +23,10 @@
 #include <algorithm>
 #include <functional>
 
-#include "ir/tensor_py.h"
-#include "ir/param_value_py.h"
+#include "ir/tensor.h"
+#include "ir/param_value.h"
 #include "debug/anf_ir_utils.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "proto/onnx.pb.h"
 
 namespace mindspore {
@@ -187,13 +187,9 @@ void IrExportBuilder::BuildParameters(const FuncGraphPtr &func_graph, onnx::Grap
     onnx::TensorProto *initializer_proto = graph_proto->add_initializer();
     initializer_proto->set_name(param_name);
     SetParamToTensorProto(param, initializer_proto);
-    auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param->default_param());
-    py::object obj = param_value->value();
-    py::object data = obj.attr("data");
-    if (py::isinstance<tensor::Tensor>(data)) {
-      auto method = data.attr("asnumpy");
-      py::array npy_data = method();
-      initializer_proto->set_raw_data(npy_data.request(true).ptr, static_cast<size_t>(npy_data.nbytes()));
+    auto tensor = std::dynamic_pointer_cast<tensor::Tensor>(param->default_param()->value());
+    if (tensor) {
+      initializer_proto->set_raw_data(tensor->data_c(), tensor->data().nbytes());
     }
   }
 }
diff --git a/mindspore/ccsrc/onnx/onnx_exporter.cc b/mindspore/ccsrc/transform/onnx/onnx_exporter.cc
similarity index 99%
rename from mindspore/ccsrc/onnx/onnx_exporter.cc
rename to mindspore/ccsrc/transform/onnx/onnx_exporter.cc
index 65a841246bd..f69fb81a7e1 100644
--- a/mindspore/ccsrc/onnx/onnx_exporter.cc
+++ b/mindspore/ccsrc/transform/onnx/onnx_exporter.cc
@@ -25,9 +25,9 @@
 
 #include "debug/anf_ir_utils.h"
 #include "proto/onnx.pb.h"
-#include "operator/ops.h"
-#include "ir/param_value_py.h"
-#include "ir/tensor_py.h"
+#include "frontend/operator/ops.h"
+#include "ir/tensor.h"
+#include "ir/param_value.h"
 
 namespace mindspore {
 enum OpMergeMode {
@@ -449,13 +449,9 @@ void OnnxExporter::ExportParameters(const FuncGraphPtr &func_graph, onnx::GraphP
     initializer_proto->set_name(param_ptr->ToString());
     SetTensorProtoInfo(param_ptr, initializer_proto);
     // set value for initializer
-    auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_ptr->default_param());
-    py::object obj = param_value->value();
-    py::object data = obj.attr("data");
-    if (py::isinstance<tensor::Tensor>(data)) {
-      auto method = data.attr("asnumpy");
-      py::array npy_data = method();
-      initializer_proto->set_raw_data(npy_data.request(true).ptr, static_cast<size_t>(npy_data.nbytes()));
+    auto tensor = std::dynamic_pointer_cast<tensor::Tensor>(param_ptr->default_param()->value());
+    if (tensor) {
+      initializer_proto->set_raw_data(tensor->data_c(), tensor->data().nbytes());
     }
   }
 }
diff --git a/mindspore/ccsrc/utils/anf_ir.proto b/mindspore/ccsrc/utils/anf_ir.proto
index 145751e7f06..2ea0511fa89 100644
--- a/mindspore/ccsrc/utils/anf_ir.proto
+++ b/mindspore/ccsrc/utils/anf_ir.proto
@@ -227,6 +227,9 @@ message NodeProto {
   
   // other fields for debug
   optional uint64 output_i = 7;
+
+  // The full_name_with_scope of CNode
+  optional string full_name = 8;
 }
 
 // Models
diff --git a/mindspore/ccsrc/utils/callbacks.cc b/mindspore/ccsrc/utils/callbacks.cc
index 427cc5e568c..ceb95d5c8c0 100644
--- a/mindspore/ccsrc/utils/callbacks.cc
+++ b/mindspore/ccsrc/utils/callbacks.cc
@@ -20,8 +20,8 @@
 #include <memory>
 #include <vector>
 #include "pybind11/pybind11.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include "utils/visible.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/utils/callbacks_ge.cc b/mindspore/ccsrc/utils/callbacks_ge.cc
index 3174ec4b151..6001b295ad3 100644
--- a/mindspore/ccsrc/utils/callbacks_ge.cc
+++ b/mindspore/ccsrc/utils/callbacks_ge.cc
@@ -16,11 +16,11 @@
 
 #include "utils/callbacks_ge.h"
 #include "pybind11/pybind11.h"
-#include "ir/param_value_py.h"
-#include "transform/df_graph_manager.h"
-#include "transform/util.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/parse/python_adapter.h"
+#include "ir/param_value.h"
+#include "transform/graph_ir/df_graph_manager.h"
+#include "transform/graph_ir/util.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include "utils/visible.h"
 
 namespace mindspore {
@@ -50,13 +50,10 @@ bool GetParameterShape(const FuncGraphPtr &graph, const std::string &param_name,
       return false;
     }
     if (param_node->name() == param_name) {
-      py::object parameter;
+      TensorPtr tensor;
       if (param_node->has_default()) {
-        auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param());
-        parameter = param_value->value();
+        tensor = std::dynamic_pointer_cast<tensor::Tensor>(param_node->default_param()->value());
       }
-      ValuePtr value = parse::data_converter::PyDataToValue(parameter);
-      TensorPtr tensor = std::dynamic_pointer_cast<tensor::Tensor>(value);
       if (tensor == nullptr) {
         shape->push_back(ONE_SHAPE);
       } else {
diff --git a/mindspore/ccsrc/utils/callbacks_ge.h b/mindspore/ccsrc/utils/callbacks_ge.h
index 9735c3000aa..f0ef583aaa9 100644
--- a/mindspore/ccsrc/utils/callbacks_ge.h
+++ b/mindspore/ccsrc/utils/callbacks_ge.h
@@ -20,8 +20,8 @@
 #include <vector>
 #include <string>
 #include <memory>
-#include "transform/types.h"
-#include "transform/util.h"
+#include "transform/graph_ir/types.h"
+#include "transform/graph_ir/util.h"
 #include "ir/tensor.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/utils/comm_manager.cc b/mindspore/ccsrc/utils/comm_manager.cc
index 70adfb7467f..de165c4aac6 100644
--- a/mindspore/ccsrc/utils/comm_manager.cc
+++ b/mindspore/ccsrc/utils/comm_manager.cc
@@ -16,17 +16,27 @@
 
 #include "utils/comm_manager.h"
 #include "utils/convert_utils.h"
+
 #ifndef NO_DLIB
 #include "hccl/hcom.h"
 #endif
 
+#if defined(ENABLE_GPU)
+#include "runtime/device/gpu/distribution/collective_init.h"
+using CollectiveInitializer = mindspore::device::gpu::CollectiveInitializer;
+using CreateCommGroupFunc = mindspore::device::gpu::CreateCommGroupFunc;
+using GetRankIDByGroupFunc = mindspore::device::gpu::GetRankIDByGroupFunc;
+using GetGroupSizeFunc = mindspore::device::gpu::GetGroupSizeFunc;
+using DestroyGroupFunc = mindspore::device::gpu::DestroyGroupFunc;
+#endif
+
 namespace mindspore {
+#ifndef NO_DLIB
 CommManager &CommManager::GetInstance() noexcept {
   static CommManager instance("hccl");
   return instance;
 }
 
-#ifndef NO_DLIB
 #define HCCL_RUN_CHECK(op_name, group, op)                      \
   do {                                                          \
     auto hccl_result = (op);                                    \
@@ -79,7 +89,79 @@ bool CommManager::DestroyGroup(const string &group) const {
   HCCL_RUN_CHECK(string("destroy communicate group"), group, hcom_destroy_group(group.c_str()));
   return true;
 }
+#elif defined(ENABLE_GPU)
+CommManager &CommManager::GetInstance() noexcept {
+  static CommManager instance("nccl");
+  return instance;
+}
+
+bool CommManager::CreateGroupSync(const string &group, const vector<unsigned int> &rank_id_list) const {
+  const void *collective_handle_ = CollectiveInitializer::instance().collective_handle();
+  if (!collective_handle_) {
+    MS_LOG(EXCEPTION) << "GPU collective handle is not initialized.";
+  }
+  MS_LOG(INFO) << "Create communication group " << group << " by rank id list " << rank_id_list;
+  auto create_comm_group_funcptr =
+    reinterpret_cast<CreateCommGroupFunc>(dlsym(const_cast<void *>(collective_handle_), "CreateCommGroup"));
+  MS_EXCEPTION_IF_NULL(create_comm_group_funcptr);
+  bool ret = (*create_comm_group_funcptr)(group, rank_id_list);
+  if (!ret) {
+    MS_LOG(ERROR) << "Creating group " << group << "for rank id list" << rank_id_list << "failed.";
+    return ret;
+  }
+  return ret;
+}
+
+bool CommManager::GetRankID(const string &group, unsigned int *rank_id) const {
+  const void *collective_handle_ = CollectiveInitializer::instance().collective_handle();
+  if (!collective_handle_) {
+    MS_LOG(EXCEPTION) << "GPU collective handle is not initialized.";
+  }
+  auto get_rank_id_funcptr =
+    reinterpret_cast<GetRankIDByGroupFunc>(dlsym(const_cast<void *>(collective_handle_), "GetRankIDByGroup"));
+  MS_EXCEPTION_IF_NULL(get_rank_id_funcptr);
+  int rank = (*get_rank_id_funcptr)(group);
+  *rank_id = static_cast<unsigned int>(rank);
+  MS_LOG(INFO) << "This process rank id is " << *rank_id << " in group " << group;
+  return true;
+}
+
+bool CommManager::GetRankSize(const string &group, unsigned int *rank_size) const {
+  const void *collective_handle_ = CollectiveInitializer::instance().collective_handle();
+  if (!collective_handle_) {
+    MS_LOG(EXCEPTION) << "GPU collective handle is not initialized.";
+  }
+  auto get_group_size_funcptr =
+    reinterpret_cast<GetGroupSizeFunc>(dlsym(const_cast<void *>(collective_handle_), "GetGroupSize"));
+  MS_EXCEPTION_IF_NULL(get_group_size_funcptr);
+  int size = (*get_group_size_funcptr)(group);
+  *rank_size = static_cast<unsigned int>(size);
+  MS_LOG(INFO) << "Group " << group << " size is " << *rank_size;
+  return true;
+}
+
+bool CommManager::DestroyGroup(const string &group) const {
+  const void *collective_handle_ = CollectiveInitializer::instance().collective_handle();
+  if (!collective_handle_) {
+    MS_LOG(EXCEPTION) << "GPU collective handle is not initialized.";
+  }
+  auto destroy_group_funcptr =
+    reinterpret_cast<DestroyGroupFunc>(dlsym(const_cast<void *>(collective_handle_), "DestroyGroup"));
+  MS_EXCEPTION_IF_NULL(destroy_group_funcptr);
+
+  bool ret = (*destroy_group_funcptr)(group);
+  if (!ret) {
+    MS_LOG(ERROR) << "Destroying group " << group << " failed.";
+    return ret;
+  }
+  return ret;
+}
 #else
+CommManager &CommManager::GetInstance() noexcept {
+  static CommManager instance("hccl");
+  return instance;
+}
+
 bool CommManager::CreateGroupSync(const string &, const vector<unsigned int> &) const { return true; }
 
 bool CommManager::GetRankID(const string &group, unsigned int *rank_id) const { return true; }
diff --git a/mindspore/ccsrc/utils/context/ms_context.cc b/mindspore/ccsrc/utils/context/ms_context.cc
index 2f2471f4600..d6381ec7e82 100644
--- a/mindspore/ccsrc/utils/context/ms_context.cc
+++ b/mindspore/ccsrc/utils/context/ms_context.cc
@@ -27,9 +27,10 @@
 #include "tdt/data_common.h"
 #endif
 #ifdef ENABLE_GE
-#include "transform/df_graph_manager.h"
+#include "transform/graph_ir/df_graph_manager.h"
 #endif
 #include "ir/tensor.h"
+#include "common/utils.h"
 
 namespace mindspore {
 #ifdef ENABLE_GE
@@ -89,7 +90,7 @@ MsContext::MsContext(const std::string &policy, const std::string &target) {
   max_device_memory_ = kDefaultMaxDeviceMemory;
   print_file_path_ = "";
   enable_graph_kernel_ = false;
-  enable_sparse_flag_ = false;
+  enable_sparse_ = false;
 }
 
 std::shared_ptr<MsContext> MsContext::GetInstance() {
@@ -168,6 +169,11 @@ bool MsContext::OpenTsd() {
     return true;
   }
 
+  auto role = common::GetEnv("MS_ROLE");
+  if (strcmp(role.c_str(), "MS_SCHED") == 0 || strcmp(role.c_str(), "MS_PSERVER") == 0) {
+    return true;
+  }
+
   unsigned int device_id;
   unsigned int rank_size = 1;
 
diff --git a/mindspore/ccsrc/utils/context/ms_context.h b/mindspore/ccsrc/utils/context/ms_context.h
index 3bca16f8ee3..19205cccb83 100644
--- a/mindspore/ccsrc/utils/context/ms_context.h
+++ b/mindspore/ccsrc/utils/context/ms_context.h
@@ -161,8 +161,8 @@ class MsContext {
   void set_enable_graph_kernel(bool enable_graph_kernel) { enable_graph_kernel_ = enable_graph_kernel; }
   bool enable_graph_kernel() const { return enable_graph_kernel_; }
 
-  bool enable_sparse_flag() const { return enable_sparse_flag_; }
-  void set_enable_sparse_flag(bool enable_sparse_flag) { enable_sparse_flag_ = enable_sparse_flag; }
+  bool enable_sparse() const { return enable_sparse_; }
+  void set_enable_sparse(bool enable_sparse) { enable_sparse_ = enable_sparse; }
 
  private:
   MsContext(const std::string &backend_policy, const std::string &target);
@@ -207,7 +207,7 @@ class MsContext {
   float max_device_memory_;
   std::string print_file_path_;
   bool enable_graph_kernel_;
-  bool enable_sparse_flag_;
+  bool enable_sparse_;
 };
 
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/utils/convert_utils.cc b/mindspore/ccsrc/utils/convert_utils.cc
index 8cb071b7699..b1847d1df53 100644
--- a/mindspore/ccsrc/utils/convert_utils.cc
+++ b/mindspore/ccsrc/utils/convert_utils.cc
@@ -25,12 +25,12 @@
 #include <cfloat>
 
 #include "pybind11/pybind11.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/parse/parse.h"
-#include "pipeline/parse/parse_base.h"
+#include "abstract/abstract_value.h"
+#include "pipeline/jit/parse/parse.h"
+#include "pipeline/jit/parse/parse_base.h"
 #include "ir/value.h"
 #include "ir/tensor.h"
-#include "ir/param_value_py.h"
+#include "ir/param_value.h"
 #include "utils/base_ref_extends.h"
 
 namespace mindspore {
@@ -230,6 +230,20 @@ bool ValueToBool(const ValuePtr &v, bool *value) {
   return true;
 }
 
+bool BaseRefToInt(const ValuePtr &v, int *value) {
+  MS_EXCEPTION_IF_NULL(v);
+  if (v->isa<tensor::Tensor>()) {
+    auto tensor = v->cast<tensor::TensorPtr>();
+    (void)tensor->data_sync();
+    int *tensor_data = static_cast<int *>(tensor->data_c());
+    auto vb = tensor_data[0];
+    *value = vb;
+    return true;
+  }
+  MS_LOG(ERROR) << "Index must be tensor type.";
+  return false;
+}
+
 bool BaseRefToBool(const BaseRef &v, bool *value) {
   if (utils::isa<ValuePtr>(v)) {
     return ValueToBool(utils::cast<ValuePtr>(v), value);
@@ -435,8 +449,8 @@ bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr &output, const py::tuple
       if (!param->has_default()) {
         MS_LOG(EXCEPTION) << "Can not determine value of Parameter " << index << " (" << param->name() << ")";
       }
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param->default_param());
-      *ret_val = param_value->value().attr("data");
+      auto tensor = param->default_param()->value();
+      *ret_val = py::cast(tensor);
     }
     return true;
   }
diff --git a/mindspore/ccsrc/utils/convert_utils.h b/mindspore/ccsrc/utils/convert_utils.h
index 40c3e88c5c4..d4ecbf4408d 100644
--- a/mindspore/ccsrc/utils/convert_utils.h
+++ b/mindspore/ccsrc/utils/convert_utils.h
@@ -28,7 +28,7 @@
 #include "utils/convert_utils_base.h"
 #include "utils/any.h"
 #include "utils/base_ref.h"
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/anf.h"
 
 namespace py = pybind11;
@@ -42,6 +42,7 @@ using TensorPtr = std::shared_ptr<Tensor>;
 py::object AnyToPyData(const Any &value);
 py::object BaseRefToPyData(const BaseRef &value);
 bool BaseRefToBool(const BaseRef &in, bool *out);
+bool BaseRefToInt(const ValuePtr &v, int *value);
 bool ValueToBool(const ValuePtr &in, bool *out);
 py::object ValuePtrToPyData(const ValuePtr &value);
 
diff --git a/mindspore/ccsrc/utils/graph_utils.h b/mindspore/ccsrc/utils/graph_utils.h
index 93edda3e34f..2a9240ac849 100644
--- a/mindspore/ccsrc/utils/graph_utils.h
+++ b/mindspore/ccsrc/utils/graph_utils.h
@@ -29,7 +29,7 @@
 #include <string>
 
 #include "ir/anf.h"
-#include "ir/primitive_base.h"
+#include "ir/primitive.h"
 #include "ir/scalar.h"
 #include "ir/tensor.h"
 #include "debug/label.h"
diff --git a/mindspore/ccsrc/utils/graph_utils_extends.cc b/mindspore/ccsrc/utils/graph_utils_extends.cc
index 0740c242363..852dd0e3f29 100644
--- a/mindspore/ccsrc/utils/graph_utils_extends.cc
+++ b/mindspore/ccsrc/utils/graph_utils_extends.cc
@@ -31,8 +31,8 @@
 #include "debug/label.h"
 #include "utils/log_adapter.h"
 #include "common/utils.h"
-#include "pipeline/parse/function_block.h"
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/function_block.h"
+#include "pipeline/jit/parse/python_adapter.h"
 
 namespace mindspore {
 namespace {
diff --git a/mindspore/ccsrc/utils/load_onnx/anf_converter.cc b/mindspore/ccsrc/utils/load_onnx/anf_converter.cc
index ad87d6ae8fb..9e8e51a46b4 100644
--- a/mindspore/ccsrc/utils/load_onnx/anf_converter.cc
+++ b/mindspore/ccsrc/utils/load_onnx/anf_converter.cc
@@ -60,6 +60,9 @@ int AnfConverter::ValidateFileStr(const std::string &modelFile, std::string file
 bool AnfConverter::ReadOnnxFromBinary(const std::string &modelFile, google::protobuf::Message *onnx_model) {
   std::unique_ptr<char> onnx_file(new (std::nothrow) char[PATH_MAX]{0});
   int fd = open(onnx_file.get(), O_RDONLY);
+  if (fd < 0) {
+    MS_LOG(EXCEPTION) << "failed to open file";
+  }
   google::protobuf::io::FileInputStream input(fd);
   google::protobuf::io::CodedInputStream code_input(&input);
   code_input.SetTotalBytesLimit(INT_MAX, 536870912);
@@ -85,7 +88,7 @@ std::shared_ptr<FuncGraph> AnfConverter::RunAnfConverter(const std::string &file
     MS_LOG(ERROR) << "Trans data not support input format!";
   } else {
     modelFile = flagItem.substr(pos + 1);
-    std::cout << "input protobuf file path is: " << flagItem.substr(pos + 1) << std::endl;
+    std::cout << "input protobuf file path is: " << modelFile << std::endl;
   }
 
   if (ValidateFileStr(modelFile, ".pb") != 0) {
diff --git a/mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc b/mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc
index c3dfa5194f3..fa1137e3f6c 100644
--- a/mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc
+++ b/mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc
@@ -22,14 +22,12 @@
 #include <vector>
 #include "google/protobuf/io/zero_copy_stream_impl.h"
 #include "ir/tensor.h"
-#include "ir/tensor_py.h"
-#include "ir/param_value_py.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "ir/param_value.h"
+#include "frontend/operator/ops.h"
+#include "abstract/abstract_value.h"
 #include "proto/onnx.pb.h"
 #include "utils/log_adapter.h"
 
-using mindspore::tensor::TensorPy;
 using std::string;
 
 namespace mindspore {
@@ -121,13 +119,15 @@ bool MSANFModelParser::BuildParameterForFuncGraph(const ParameterPtr &node, cons
     std::string initial_data = initialize_proto.raw_data();
     auto *tensor_data_buf = reinterpret_cast<uint8_t *>(tensor_info->data_c());
     MS_EXCEPTION_IF_NULL(tensor_data_buf);
-    memcpy_s(tensor_data_buf, tensor_info->data().nbytes(), initial_data.data(), initial_data.size());
+    auto ret = memcpy_s(tensor_data_buf, tensor_info->data().nbytes(), initial_data.data(), initial_data.size());
+    if (ret != 0) {
+      MS_LOG(EXCEPTION) << "memcpy_s error, errorno" << ret;
+    }
 
-    py::array array_data = TensorPy::AsNumpy(*tensor_info);
-    ParamValuePyPtr para_value_ptr = std::make_shared<ParamValuePy>();
-    MS_EXCEPTION_IF_NULL(para_value_ptr);
-    para_value_ptr->set_value(array_data);
-    node->set_default_param(para_value_ptr);
+    auto param_value = std::make_shared<ParamValue>();
+    MS_EXCEPTION_IF_NULL(param_value);
+    param_value->set_value(tensor_info);
+    node->set_default_param(param_value);
   }
   anfnode_build_map_[value_proto.name()] = node;
   return true;
@@ -252,7 +252,11 @@ bool MSANFModelParser::ObtainValueNodeInTensorForm(const std::string &value_node
   tensor::TensorPtr tensor_info = std::make_shared<tensor::Tensor>(kDefaultValueSwitchMap[attr_tensor_type], shape);
   const std::string &tensor_buf = attr_tensor.raw_data();
   auto *tensor_data_buf = reinterpret_cast<uint8_t *>(tensor_info->data_c());
-  memcpy_s(tensor_data_buf, tensor_info->data().nbytes(), tensor_buf.data(), tensor_buf.size());
+  auto ret = memcpy_s(tensor_data_buf, tensor_info->data().nbytes(), tensor_buf.data(), tensor_buf.size());
+  if (ret != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno" << ret;
+  }
+
   auto new_value_node = NewValueNode(MakeValue(tensor_info));
   MS_EXCEPTION_IF_NULL(new_value_node);
   auto tensor_abstract = tensor_info->ToAbstract();
@@ -339,7 +343,6 @@ bool MSANFModelParser::GetAttrValueForValueNode(const std::string &ref_attr_name
       MS_LOG(ERROR) << "parse ValueNode value don't support input of ref_attr_name";
       return false;
   }
-  return true;
 }
 
 bool MSANFModelParser::BuildValueNodeForFuncGraph(const onnx::NodeProto &node_proto) {
diff --git a/mindspore/ccsrc/utils/load_onnx/anf_model_parser.h b/mindspore/ccsrc/utils/load_onnx/anf_model_parser.h
index 11b9cd101f8..58fbd1bc707 100644
--- a/mindspore/ccsrc/utils/load_onnx/anf_model_parser.h
+++ b/mindspore/ccsrc/utils/load_onnx/anf_model_parser.h
@@ -32,7 +32,7 @@ using uint64 = uint64_t;
 using float16 = Eigen::half;
 class MSANFModelParser {
  public:
-  MSANFModelParser() = default;
+  MSANFModelParser() : producer_name_(""), model_version_(0), ir_version_(0) {}
   ~MSANFModelParser() = default;
 
   FuncGraphPtr Parse(const onnx::ModelProto &model_proto);
diff --git a/mindspore/ccsrc/utils/log_adapter.cc b/mindspore/ccsrc/utils/log_adapter.cc
index 3588754dae1..702deefcb4b 100644
--- a/mindspore/ccsrc/utils/log_adapter.cc
+++ b/mindspore/ccsrc/utils/log_adapter.cc
@@ -18,7 +18,6 @@
 
 #include <unistd.h>
 #include <map>
-#include "pybind11/pybind11.h"
 #include "debug/trace.h"
 
 // namespace to support utils module definition
@@ -158,6 +157,7 @@ static std::string ExceptionTypeToString(ExceptionType type) {
 static const char *GetSubModuleName(SubModuleId module_id) {
   static const char *sub_module_names[NUM_SUBMODUES] = {
     "UNKNOWN",    // SM_UNKNOWN
+    "BASE",       // SM_BASE
     "ANALYZER",   // SM_ANALYZER
     "COMMON",     // SM_COMMON
     "DEBUG",      // SM_DEBUG
@@ -176,7 +176,8 @@ static const char *GetSubModuleName(SubModuleId module_id) {
     "PYNATIVE",   // SM_PYNATIVE
     "SESSION",    // SM_SESSION
     "UTILS",      // SM_UTILS
-    "VM"          // SM_VM
+    "VM",         // SM_VM
+    "ABSTRACT"    // SM_ABSTRACT
   };
 
   return sub_module_names[module_id % NUM_SUBMODUES];
@@ -219,16 +220,10 @@ void LogWriter::operator^(const LogStream &stream) const {
   trace::TraceGraphEval();
   trace::GetEvalStackInfo(oss);
 
-  if (exception_type_ == IndexError) {
-    throw pybind11::index_error(oss.str());
+  if (exception_handler_ != nullptr) {
+    exception_handler_(exception_type_, oss.str());
   }
-  if (exception_type_ == ValueError) {
-    throw pybind11::value_error(oss.str());
-  }
-  if (exception_type_ == TypeError) {
-    throw pybind11::type_error(oss.str());
-  }
-  pybind11::pybind11_fail(oss.str());
+  throw std::runtime_error(oss.str());
 }
 
 static std::string GetEnv(const std::string &envvar) {
diff --git a/mindspore/ccsrc/utils/log_adapter.h b/mindspore/ccsrc/utils/log_adapter.h
index dfd463ee1d7..a0e9bfc6d6b 100644
--- a/mindspore/ccsrc/utils/log_adapter.h
+++ b/mindspore/ccsrc/utils/log_adapter.h
@@ -22,6 +22,7 @@
 #include <string>
 #include <sstream>
 #include <memory>
+#include <functional>
 #include "./overload.h"
 #include "./securec.h"
 #ifdef USE_GLOG
@@ -99,6 +100,7 @@ enum MsLogLevel : int { DEBUG = 0, INFO, WARNING, ERROR, EXCEPTION };
 
 enum SubModuleId : int {
   SM_UNKNOWN = 0,  // unknown submodule
+  SM_BASE,         // base
   SM_ANALYZER,     // static analyzer
   SM_COMMON,       // common
   SM_DEBUG,        // debug
@@ -118,6 +120,7 @@ enum SubModuleId : int {
   SM_SESSION,      // session
   SM_UTILS,        // utils
   SM_VM,           // VM
+  SM_ABSTRACT,     // abstract
   NUM_SUBMODUES    // number of submodules
 };
 
@@ -133,6 +136,8 @@ extern int g_ms_submodule_log_levels[] __attribute__((visibility("default")));
 
 class LogWriter {
  public:
+  using ExceptionHandler = std::function<void(ExceptionType, const std::string &msg)>;
+
   LogWriter(const LocationInfo &location, MsLogLevel log_level, SubModuleId submodule,
             ExceptionType excp_type = NoExceptionType)
       : location_(location), log_level_(log_level), submodule_(submodule), exception_type_(excp_type) {}
@@ -141,6 +146,8 @@ class LogWriter {
   void operator<(const LogStream &stream) const noexcept __attribute__((visibility("default")));
   void operator^(const LogStream &stream) const __attribute__((noreturn, visibility("default")));
 
+  static void set_exception_handler(ExceptionHandler exception_handler) { exception_handler_ = exception_handler; }
+
  private:
   void OutputLog(const std::ostringstream &msg) const;
 
@@ -148,6 +155,8 @@ class LogWriter {
   MsLogLevel log_level_;
   SubModuleId submodule_;
   ExceptionType exception_type_;
+
+  inline static ExceptionHandler exception_handler_ = nullptr;
 };
 
 #define MSLOG_IF(level, condition, excp_type)                                                                       \
diff --git a/mindspore/ccsrc/ir/param_value_py.h b/mindspore/ccsrc/utils/log_adapter_py.cc
similarity index 54%
rename from mindspore/ccsrc/ir/param_value_py.h
rename to mindspore/ccsrc/utils/log_adapter_py.cc
index a03e34ac6e1..c4793b960bf 100644
--- a/mindspore/ccsrc/ir/param_value_py.h
+++ b/mindspore/ccsrc/utils/log_adapter_py.cc
@@ -14,30 +14,33 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_CCSRC_IR_PARAM_VALUE_PY_H_
-#define MINDSPORE_CCSRC_IR_PARAM_VALUE_PY_H_
+#include "utils/log_adapter.h"
 
-#include <memory>
-
-#include "ir/anf.h"
+#include <string>
 #include "pybind11/pybind11.h"
 
-namespace mindspore {
 namespace py = pybind11;
-
-class ParamValuePy : public ParamValue {
+namespace mindspore {
+class PyExceptionInitializer {
  public:
-  ParamValuePy() : value_(py::none()) {}
-  explicit ParamValuePy(const py::object &value) : value_(value) {}
-  ~ParamValuePy() override = default;
+  PyExceptionInitializer() { mindspore::LogWriter::set_exception_handler(HandleExceptionPy); }
 
-  py::object value() { return value_; }
-  void set_value(const py::object &obj) { value_ = obj; }
+  ~PyExceptionInitializer() = default;
 
  private:
-  py::object value_;
+  static void HandleExceptionPy(ExceptionType exception_type, const std::string &str) {
+    if (exception_type == IndexError) {
+      throw py::index_error(str);
+    }
+    if (exception_type == ValueError) {
+      throw py::value_error(str);
+    }
+    if (exception_type == TypeError) {
+      throw py::type_error(str);
+    }
+    py::pybind11_fail(str);
+  }
 };
 
-using ParamValuePyPtr = std::shared_ptr<ParamValuePy>;
+static PyExceptionInitializer py_exception_initializer;
 }  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_IR_PARAM_VALUE_PY_H_
diff --git a/mindspore/ccsrc/utils/primitive_utils.cc b/mindspore/ccsrc/utils/primitive_utils.cc
index 97fa954e128..490e2517a9a 100644
--- a/mindspore/ccsrc/utils/primitive_utils.cc
+++ b/mindspore/ccsrc/utils/primitive_utils.cc
@@ -15,7 +15,7 @@
  */
 
 #include "utils/primitive_utils.h"
-#include "pipeline/parse/python_adapter.h"
+#include "pipeline/jit/parse/python_adapter.h"
 #include "utils/log_adapter.h"
 #include "common/utils.h"
 
diff --git a/mindspore/ccsrc/utils/symbolic.h b/mindspore/ccsrc/utils/symbolic.h
index 1b7a212610a..ca68b2c8777 100644
--- a/mindspore/ccsrc/utils/symbolic.h
+++ b/mindspore/ccsrc/utils/symbolic.h
@@ -26,7 +26,7 @@
 #include <string>
 
 #include "ir/anf.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #include "utils/any.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/utils/tensorprint_utils.cc b/mindspore/ccsrc/utils/tensorprint_utils.cc
index ee53345f31a..08cd4e42912 100644
--- a/mindspore/ccsrc/utils/tensorprint_utils.cc
+++ b/mindspore/ccsrc/utils/tensorprint_utils.cc
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 #include "ir/tensor.h"
-#include "device/convert_tensor_utils.h"
+#include "runtime/device/convert_tensor_utils.h"
 #include "./securec.h"
 #ifndef NO_DLIB
 #include "tdt/tsd_client.h"
@@ -256,6 +256,7 @@ bool SaveDataItem2File(const std::vector<tdt::DataItem> &items, const std::strin
     if (!print.SerializeToOstream(output)) {
       MS_LOG(ERROR) << "Save print file:" << print_file_path << " fail.";
       ret_end_thread = true;
+      break;
     }
     print.Clear();
   }
diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h
index e28adb6e216..3e82aaff2d5 100644
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@@ -176,6 +176,10 @@ constexpr auto kApplyAdamWithAmsgradOpName = "ApplyAdamWithAmsgrad";
 constexpr auto kTensorMoveOpName = "TensorMove";
 constexpr auto kTensorScatterUpdateOpName = "TensorScatterUpdate";
 constexpr auto kScatterNdUpdateOpName = "ScatterNdUpdate";
+constexpr auto kPushOpName = "Push";
+constexpr auto kPullOpName = "Pull";
+constexpr auto kEmbeddingLookupOpName = "EmbeddingLookup";
+constexpr auto kEmbeddingLookupProxyOpName = "EmbeddingLookupProxy";
 
 // attr key name
 constexpr auto kAttrInputNames = "input_names";
@@ -236,9 +240,12 @@ constexpr auto kAttrOutputNum = "output_num";
 constexpr auto kAttrSizeSplits = "size_splits";
 constexpr auto kAttrOutputDefault = "output_default";
 constexpr auto kAttrPrimitiveTarget = "primitive_target";
+constexpr auto kAttrUseLocking = "use_locking";
 constexpr auto kAttrReduceScatterFlag = "reduce_scatter_flag";
 constexpr auto kAttrOffset = "offset";
-constexpr auto kAttrUseLocking = "use_locking";
+constexpr auto kAttrPsKey = "ps_key";
+constexpr auto kAttrOptimizerType = "optim_type";
+constexpr auto kAttrChildGraph = "child_graph";
 
 // attr value
 constexpr auto kValueTargetSwitch = "target_switch";
@@ -262,6 +269,7 @@ constexpr auto kAnfPartialFuncGraphIndex = 1;
 constexpr auto kRealInputNodeIndexInTupleGetItem = 1;
 constexpr auto kInputNodeOutputIndexInTupleGetItem = 2;
 constexpr auto kTupleGetItemInputSize = 3;
+constexpr auto kSwitchInputSize = 4;
 // index define of control depend
 constexpr auto kControlDependPriorIndex = 1;
 constexpr auto kControlDependBehindIndex = 2;
@@ -290,12 +298,24 @@ const std::set<std::string> kOpFormatList = {
   kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04, kOpFormat_NDHWC};
 const std::set<std::string> kDefaultCompatibleFormat = {kOpFormat_ND, kOpFormat_NCHW, kOpFormat_NHWC, kOpFormat_HWCN};
 const std::set<std::string> kOptOperatorSet = {
-  kMomentumOpName,       kApplyMomentumOpName,        kApplyAdadeltaOpName,
-  kApplyAdagradOpName,   kApplyAdagradDAName,         kApplyAdamOpName,
-  kApplyAdaMaxOpName,    kApplyAddSignOpName,         kApplyCenteredRMSPOpName,
-  kApplyFtrlOpName,      kApplyFtrlV2OpName,          kApplyGradientDescentOpName,
-  kApplyPowerSignOpName, kApplyProximalAdagradOpName, kApplyProximalGradientDescentOpName,
+  kMomentumOpName,
+  kApplyMomentumOpName,
+  kApplyAdadeltaOpName,
+  kApplyAdagradOpName,
+  kApplyAdagradDAName,
+  kApplyAdamOpName,
+  kApplyAdaMaxOpName,
+  kApplyAddSignOpName,
+  kApplyCenteredRMSPOpName,
+  kApplyFtrlOpName,
+  kApplyFtrlV2OpName,
+  kApplyGradientDescentOpName,
+  kApplyPowerSignOpName,
+  kApplyProximalAdagradOpName,
+  kApplyProximalGradientDescentOpName,
   kApplyRMSPropOpName,
+  kPushOpName,
+  kPullOpName,
 };
 
 const std::set<std::string> kHWSpecialFormatSet = {kOpFormat_FRAC_Z,       kOpFormat_NC1KHKWHWC0, kOpFormat_NC1HWC0,
diff --git a/mindspore/ccsrc/vm/backend.cc b/mindspore/ccsrc/vm/backend.cc
index 47bc69bbbbe..0290ee57fc2 100644
--- a/mindspore/ccsrc/vm/backend.cc
+++ b/mindspore/ccsrc/vm/backend.cc
@@ -23,7 +23,7 @@
 #include "utils/callbacks.h"
 #include "utils/graph_utils.h"
 #include "utils/base_ref_extends.h"
-#include "session/session_factory.h"
+#include "backend/session/session_factory.h"
 #include "common/utils.h"
 #ifdef ENABLE_GE
 #include "utils/callbacks_ge.h"
@@ -32,6 +32,7 @@
 namespace mindspore {
 namespace compile {
 bool Backend::GetCond(const BaseRef &c, bool *const value) { return BaseRefToBool(c, value); }
+bool Backend::GetIndex(const BaseRef &c, int *const value) { return BaseRefToInt(utils::cast<ValuePtr>(c), value); }
 
 LinConvertResult MsBackend::GetMultiGraphRun(const FuncGraphPtr &g) {
   // multi_graph merge to one, big graph have paramters in begin and only have one output
diff --git a/mindspore/ccsrc/vm/backend.h b/mindspore/ccsrc/vm/backend.h
index 3a93cf930f9..208c4010fb5 100644
--- a/mindspore/ccsrc/vm/backend.h
+++ b/mindspore/ccsrc/vm/backend.h
@@ -26,7 +26,7 @@
 #include "ir/anf.h"
 #include "vm/segment_runner.h"
 #include "vm/vm.h"
-#include "session/session_basic.h"
+#include "backend/session/session_basic.h"
 
 namespace mindspore {
 namespace compile {
@@ -46,6 +46,7 @@ class Backend {
   virtual void SimulateRun(FinalVMPtr, FuncGraphPtr) {}
   virtual SwitchCondStatus SetSimuCond(const BaseRef &, bool) { return kCondOk; }
   virtual bool GetCond(const BaseRef &c, bool *value);
+  virtual bool GetIndex(const BaseRef &c, int *value);
   virtual void SetSwitchGraph() {}
   virtual void SetSwitchActive(const BaseRef &, bool) {}
   virtual void RecallGraphInput(const FuncGraphPtr &, const VectorRef &, const BaseRef &) {}
diff --git a/mindspore/ccsrc/vm/segment_runner.cc b/mindspore/ccsrc/vm/segment_runner.cc
index db275061343..540b77bcaf1 100644
--- a/mindspore/ccsrc/vm/segment_runner.cc
+++ b/mindspore/ccsrc/vm/segment_runner.cc
@@ -31,7 +31,7 @@
 #include "utils/utils.h"
 #include "ir/manager.h"
 #include "ir/func_graph_cloner.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 const char kMsConvert[] = "ms";
diff --git a/mindspore/ccsrc/vm/transform.cc b/mindspore/ccsrc/vm/transform.cc
index 80d2fc9df96..2cf6ead8130 100644
--- a/mindspore/ccsrc/vm/transform.cc
+++ b/mindspore/ccsrc/vm/transform.cc
@@ -26,9 +26,9 @@
 #include <string>
 #include <vector>
 
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #ifdef ENABLE_GE
-#include "transform/convert.h"
+#include "transform/graph_ir/convert.h"
 #endif
 #include "utils/graph_utils.h"
 #include "utils/context/ms_context.h"
@@ -46,8 +46,9 @@ using TypedPrimitiveAbstractClosurePtr = std::shared_ptr<abstract::TypedPrimitiv
 std::vector<PrimitivePtr> nonlinear_ops = {prim::kPrimReturn, prim::kPrimPartial, prim::kPrimSwitch,
                                            prim::kPrimMakeTuple, prim::kPrimBpropCut};
 const std::vector<PrimitivePtr> &GetMsNonlinearOps() {
-  static const std::vector<PrimitivePtr> ms_nonlinear_ops = {prim::kPrimReturn, prim::kPrimPartial, prim::kPrimSwitch,
-                                                             prim::kPrimBpropCut};
+  static const std::vector<PrimitivePtr> ms_nonlinear_ops = {prim::kPrimReturn,   prim::kPrimPartial,
+                                                             prim::kPrimSwitch,   prim::kPrimMakeTuple,
+                                                             prim::kPrimBpropCut, prim::kPrimSwitchLayer};
   return ms_nonlinear_ops;
 }
 
@@ -187,6 +188,29 @@ std::vector<AnfNodePtr> SplitSort(const FuncGraphPtr &graph, const std::string &
   std::reverse(result.begin(), result.end());
   return result;
 }
+
+bool IsSubGraph(const AnfNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(node);
+  if (node->isa<CNode>()) {
+    auto cnode = node->cast<CNodePtr>();
+    auto &inputs = cnode->inputs();
+    if (inputs.empty()) {
+      MS_LOG(EXCEPTION) << "Inputs of apply node is empty";
+    }
+
+    AnfNodePtr fn = inputs[0];
+    if (!IsValueNode<Primitive>(fn)) {
+      return false;
+    }
+    auto node_prim = GetValueNode<PrimitivePtr>(fn);
+    if (node_prim->name() == prim::kPrimPartial->name()) {
+      return true;
+    }
+  } else if (IsValueNode<FuncGraph>(node)) {
+    return true;
+  }
+  return false;
+}
 }  // namespace
 
 CompileGraph::CompileGraph(const BackendPtr &backend, const std::vector<PrimitivePtr> &cut_list)
@@ -214,7 +238,6 @@ bool CompileGraph::IsCut(const AnfNodePtr &node) {
     }
 
     AnfNodePtr fn = inputs[0];
-    MS_EXCEPTION_IF_NULL(fn);
     if (IsValueNode<FuncGraph>(fn)) {
       auto fg = GetValueNode<FuncGraphPtr>(fn);
       if (fg->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) {
@@ -235,6 +258,15 @@ bool CompileGraph::IsCut(const AnfNodePtr &node) {
           MS_EXCEPTION_IF_NULL(ms_context);
           ms_context->set_enable_pynative_hook(true);
         }
+
+        if (backend_->name() == kMsConvert && prim->name() == prim::kPrimMakeTuple->name()) {
+          if (inputs.size() < 2) {
+            return false;
+          }
+          auto ret = IsSubGraph(inputs[1]);
+          return ret;
+        }
+
         return true;
       }
     }
@@ -466,6 +498,8 @@ int CompileGraph::InterpretNode(const FuncGraphPtr &graph, const CNodePtr &node)
     } else if (IsPrimitive(fn, prim::kPrimSwitch)) {
       AddSwitch(node);
       AddSinkSwitch(node);
+    } else if (IsPrimitive(fn, prim::kPrimSwitchLayer)) {
+      AddSwitchLayer(node);
     } else if (IsPrimitive(fn, prim::kPrimMakeTuple)) {
       AddMakeTuple(node);
     } else {
@@ -622,6 +656,17 @@ void CompileGraph::AddSwitch(const CNodePtr &node) {
   AddInst(Instruction::kSwitch, args);
 }
 
+void CompileGraph::AddSwitchLayer(const CNodePtr &node) {
+  auto inputs = node->inputs();
+  if (inputs.size() != 3) {
+    MS_LOG(EXCEPTION) << "Switch layer must have index and branches.";
+  }
+  VectorRef args;
+  args.emplace_back(Ref(inputs[1]));
+  args.emplace_back(Ref(inputs[2]));
+  AddInst(Instruction::kSwitchLayer, args);
+}
+
 void CompileGraph::AddReturn(const CNodePtr &node) {
   VectorRef args;
   if (backend_->simu_flag()) {
diff --git a/mindspore/ccsrc/vm/transform.h b/mindspore/ccsrc/vm/transform.h
index a02478fc1ba..d08a24d188f 100644
--- a/mindspore/ccsrc/vm/transform.h
+++ b/mindspore/ccsrc/vm/transform.h
@@ -28,7 +28,7 @@
 
 #include "vm/vm.h"
 #include "ir/anf.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "vm/segment_runner.h"
 #include "vm/backend.h"
 
@@ -90,6 +90,7 @@ class CompileGraph {
   void AddPartial(const CNodePtr &node);
   void AddMakeTuple(const CNodePtr &node);
   void AddSwitch(const CNodePtr &node);
+  void AddSwitchLayer(const CNodePtr &node);
   void AddReturn(const CNodePtr &node);
   void AddPrimitive(const CNodePtr &node, const PrimitivePtr &prim);
   void AddInput(const AnfNodePtr &node);
diff --git a/mindspore/ccsrc/vm/vm.cc b/mindspore/ccsrc/vm/vm.cc
index c73d41df6c6..baa5b0ea118 100644
--- a/mindspore/ccsrc/vm/vm.cc
+++ b/mindspore/ccsrc/vm/vm.cc
@@ -23,7 +23,7 @@
 #include "vm/vmimpl.h"
 #include "vm/backend.h"
 #include "vm/transform.h"
-#include "pipeline/parse/data_converter.h"
+#include "pipeline/jit/parse/data_converter.h"
 #include "utils/base_ref_extends.h"
 
 namespace mindspore {
@@ -480,6 +480,36 @@ void FinalVM::InstSwitch(const VectorRef &args) {
   MS_LOG(DEBUG) << "End";
 }
 
+void FinalVM::InstSwitchLayer(const VectorRef &args) {
+  MS_LOG(DEBUG) << "Start";
+  const size_t args_size = 2;
+  if (args.size() != args_size) {
+    MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is " << args.size()
+                  << ".";
+    return;
+  }
+
+  int idx = utils::cast<int>(args[0]);
+  VectorRef branches = utils::cast<VectorRef>(Ref(utils::cast<int>(args[1])));
+  int size = static_cast<int>(branches.size());
+
+  BaseRef index = Ref(idx);
+  int idx_value = 0;
+  if (!backend_->GetIndex(index, &idx_value)) {
+    MS_LOG(EXCEPTION) << "Not supported type to be casted to int.";
+  }
+  if (idx_value < 0) {
+    // Add support negative index range [-size, -1].
+    idx_value += size;
+  }
+  if (idx_value < 0 || idx_value >= size) {
+    MS_LOG(EXCEPTION) << __FUNCTION__ << " given index " << idx_value << " out of range. Please make sure the value "
+                      << "of index in [" << -size << ", " << size << "), and the type is int32.";
+  }
+  Push(branches[idx_value]);
+  MS_LOG(DEBUG) << "End";
+}
+
 void FinalVM::InstTuple(const VectorRef &args) {
   MS_LOG(DEBUG) << "Start";
   VectorRef tuple;
@@ -618,57 +648,8 @@ void FinalVM::SyncData(const py::object &arg) {
 
 BaseRef FinalVM::RunHook(const PrimitivePtr &prim, const VectorRef &args) {
   MS_LOG(DEBUG) << "input for operation:";
-  auto prim_py = dyn_cast<PrimitivePy>(prim);
-  std::size_t args_size = args.size();
-  auto py_args = py::tuple(args_size);
-  size_t i = 0;
-  for (auto &arg : args) {
-    py_args[i] = BaseRefToPyData(arg);
-    MS_LOG(DEBUG) << "arg: " << i << ":";
-    i++;
-  }
-  // Hook operator for execute cell custom bprop function
-  py::object obj;
-  bool is_bprop = prim->HasAttr("bprop");
-  if (is_bprop) {
-    SyncData(py_args);
-    py::function fn_bprop = prim_py->hook();
-    obj = fn_bprop(*py_args);
-    return obj;
-  }
-  // Sync gradient data from device to host
-  SyncData(py_args[2]);
-  bool is_cell = prim->HasAttr("cell_hook");
-  if (is_cell) {
-    // Hook operator for execute cell hook function
-    std::string cell_id = GetValue<std::string>(prim->GetAttr("cell_id"));
-    if (_hook_grad.find(cell_id) != _hook_grad.end()) {
-      std::size_t hook_args_size = 3;
-      auto hook_args = py::tuple(hook_args_size);
-      hook_args[0] = cell_id;
-      hook_args[1] = py::make_tuple(_hook_grad[cell_id]);
-      hook_args[2] = py::make_tuple(py_args[2]);
-      py::function fn_hook = prim_py->hook();
-      obj = fn_hook(*hook_args);
-      if (py::isinstance<py::none>(obj)) {
-        obj = py_args[2];
-      }
-      _hook_grad.erase(cell_id);
-    } else {
-      _hook_grad[cell_id] = py_args[2];
-      obj = py_args[2];
-    }
-  } else {
-    // Hook operator for execute variable hook function
-    py::function fn_hook = prim_py->hook();
-    obj = fn_hook(py::make_tuple(py_args[2]));
-    if (py::isinstance<py::none>(obj)) {
-      obj = py_args[2];
-    }
-  }
-  obj = py::make_tuple(obj);
-  return obj;
+  MS_EXCEPTION_IF_NULL(prim);
+  return prim->RunHookFunction(args);
 }
-
 }  // namespace compile
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/vm/vm.h b/mindspore/ccsrc/vm/vm.h
index 6a078c9baf0..02a1ad4ddb1 100644
--- a/mindspore/ccsrc/vm/vm.h
+++ b/mindspore/ccsrc/vm/vm.h
@@ -51,15 +51,17 @@ enum Instruction {
   kPush,
   kPrim,
   kGraph,
-  kPadStack
+  kPadStack,
+  kSwitchLayer
 };
 
 using InstType = std::pair<Instruction, VectorRef>;
 using InstSet = std::vector<InstType>;
 using InstFunctionMap = std::map<Instruction, std::function<void(const VectorRef &)>>;
 
-const std::vector<std::string> inst_str{"call",  "tail_call", "return", "partial",   "switch", "switch_return", "tuple",
-                                        "input", "external",  "push",   "primitive", "graph",  "pad_stack"};
+const std::vector<std::string> inst_str{"call",          "tail_call", "return",    "partial",     "switch",
+                                        "switch_return", "tuple",     "input",     "external",    "push",
+                                        "primitive",     "graph",     "pad_stack", "switch_layer"};
 class StructPartial : public Base {
  public:
   // Initialize StructPartial.
@@ -114,6 +116,7 @@ class FinalVM {
   void InstExternal(const VectorRef &args);
   void InstPushPrim(const VectorRef &args);
   void InstSwitchReturn(const VectorRef &args);
+  void InstSwitchLayer(const VectorRef &args);
   void set_insts(const InstSet &value) { insts_ = value; }
   BaseRef RunHook(const PrimitivePtr &prim, const VectorRef &arg);
 
@@ -157,8 +160,7 @@ class FinalVM {
     {Instruction::kExternal, [this](const VectorRef &args) { InstExternal(args); }},
     {Instruction::kPrim, [this](const VectorRef &args) { InstPushPrim(args); }},
     {Instruction::kSwitchReturn, [this](const VectorRef &args) { InstSwitchReturn(args); }},
-  };
-  std::map<std::string, py::object> _hook_grad;
+    {Instruction::kSwitchLayer, [this](const VectorRef &args) { InstSwitchLayer(args); }}};
 };
 
 using FinalVMPtr = std::shared_ptr<FinalVM>;
diff --git a/mindspore/ccsrc/vm/vmimpl.cc b/mindspore/ccsrc/vm/vmimpl.cc
index 51b2c9b3d53..2aebf8ad0d9 100644
--- a/mindspore/ccsrc/vm/vmimpl.cc
+++ b/mindspore/ccsrc/vm/vmimpl.cc
@@ -27,10 +27,10 @@
 #include <set>
 
 #include "ir/tensor.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/manager.h"
 #include "ir/func_graph_cloner.h"
-#include "ir/primitive.h"
+#include "ir/primitive_py.h"
 #include "utils/convert_utils.h"
 #include "utils/primitive_utils.h"
 #include "debug/draw.h"
diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py
index 571cc9cb40d..1605ee4bc55 100644
--- a/mindspore/common/parameter.py
+++ b/mindspore/common/parameter.py
@@ -17,11 +17,11 @@
 import numbers
 from copy import copy
 from mindspore import context
+from .._c_expression import ParamValue
 from . import dtype as mstype
 from .initializer import initializer, Initializer
 from .tensor import Tensor, MetaTensor
 from .._checkparam import _check_str_by_regular
-from ..parallel._utils import _set_clone_info, _CloneInfo
 from ..parallel._tensor import _get_slice_index
 
 __all__ = ['Parameter', 'ParameterTuple']
@@ -51,34 +51,33 @@ class Parameter:
         requires_grad (bool): True if the parameter requires gradient. Default: True.
         layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in paralle mode,
             broadcast and gradients communication would not be applied on parameters. Default: False.
-        sparse_grad (str): Set if the parameter's gradient is sparse. Default: empty.
-        has_indexed_slices (bool): Set if the parameter's gradient is indexed_slices. Default: false.
     """
-    def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False,
-                 sparse_grad="", has_indexed_slices_grad=False):
+    def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False):
+        self._value = ParamValue()
         self.set_parameter_data(default_input)
         self.name = name
         self.requires_grad = requires_grad
         self.layerwise_parallel = layerwise_parallel
-        self.sparse_grad = sparse_grad
-        self.has_indexed_slices_grad = has_indexed_slices_grad
         self._is_init = False
         self._sliced = False
-        self.clone_info = _CloneInfo()
+        self.is_param_ps = False
         if context.get_context("mode") == context.PYNATIVE_MODE:
             self.init_data()
 
     def __repr__(self):
         format_str = 'Parameter (name={name})'
-        return format_str.format(name=self._name)
+        return format_str.format(name=self._value.name)
 
     def __parameter__(self):
         """For parse check."""
 
+    def set_param_ps(self):
+        self.is_param_ps = True
+
     @property
     def name(self):
         """Get the name of the parameter."""
-        return self._name
+        return self._value.name
 
     @name.setter
     def name(self, name_):
@@ -100,7 +99,7 @@ class Parameter:
                                  format(name_, PARAMETER_NAME_PREFIX_MAX_LEN))
         else:
             raise ValueError("The type of the name should be `str` or `None`.")
-        self._name = name_
+        self._value.name = name_
 
     @property
     def sliced(self):
@@ -140,7 +139,9 @@ class Parameter:
         """
         _check_str_by_regular(prefix)
         x = copy(self)
-        x.name = prefix + '.' + x.name
+        # pylint: disable=protected-access
+        x._value = self._value.clone()
+        x._value.name = prefix + '.' + self._value.name
         x.is_init = False
         if init != 'same':
             shape = self.default_input.shape
@@ -152,58 +153,42 @@ class Parameter:
                     x.init_data()
             else:
                 x.default_input = initializer(init, shape=shape, dtype=dtype)
-
-        x.clone_info = copy(self.clone_info)
-        _set_clone_info(self.clone_info, x.clone_info)
         return x
 
     @property
     def layerwise_parallel(self):
-        return self._layerwise_parallel
+        return self._value.layerwise_parallel
 
     @layerwise_parallel.setter
     def layerwise_parallel(self, value=True):
         if not isinstance(value, bool):
             raise TypeError("`layerwise_parallel` parameter must be bool type")
-        self._layerwise_parallel = value
+        self._value.layerwise_parallel = value
 
     @property
     def requires_grad(self):
         """Return whether the parameter requires gradient."""
-        return self._requires_grad
+        return self._value.requires_grad
 
     @requires_grad.setter
     def requires_grad(self, value=True):
         if not isinstance(value, bool):
             raise TypeError("`requires_grad` parameter must be bool type")
-        self._requires_grad = value
-
-    @property
-    def sparse_grad(self):
-        """Return whether the parameter's gradient is sparse."""
-        return self._sparse_grad
-
-    @sparse_grad.setter
-    def sparse_grad(self, value=""):
-        if not isinstance(value, str):
-            raise TypeError("`sparse_grad` parameter must be str type")
-        self._sparse_grad = value
-
-    @property
-    def has_indexed_slices_grad(self):
-        """Return whether the parameter's gradient is indexed_slices."""
-        return self._has_indexed_slices_grad
-
-    @has_indexed_slices_grad.setter
-    def has_indexed_slices_grad(self, value=False):
-        if not isinstance(value, bool):
-            raise TypeError("`has_indexed_slices_grad` parameter must be bool type")
-        self._has_indexed_slices_grad = value
+        self._value.requires_grad = value
 
     @property
     def data(self):
         return self.default_input
 
+    @property
+    def default_input(self):
+        return self._data
+
+    @default_input.setter
+    def default_input(self, data):
+        self._data = data
+        self._value.data = data
+
     def __add__(self, other):
         return self.default_input + other
 
@@ -223,11 +208,12 @@ class Parameter:
 
     def set_parameter_data(self, data):
         """Set `default_input` of current `Parameter`."""
+        self.init_mode = None
         if isinstance(data, bool):
             raise ValueError('Parameter data can not be `bool`')
         if isinstance(data, Tensor):
             # make a copy of Tensor to init the parameter
-            data = Tensor(data.asnumpy().copy())
+            data = Tensor(data.asnumpy())
             data.init_flag = False
         elif isinstance(data, Initializer):
             self.init_mode = data
@@ -242,7 +228,6 @@ class Parameter:
 
         self.default_input = data
 
-
     def init_data(self, layout=None, set_sliced=False):
         """
         Init data of the parameter.
@@ -256,7 +241,7 @@ class Parameter:
             set_sliced (bool): True if should set parameter sliced after init the data of initializer.
                 Default: False.
         """
-        if not isinstance(self.default_input, MetaTensor):
+        if self.init_mode is None:
             return
         if layout is not None:
             if not isinstance(layout, list):
diff --git a/mindspore/common/tensor.py b/mindspore/common/tensor.py
index 043ab4f6cfb..64a8eb46373 100644
--- a/mindspore/common/tensor.py
+++ b/mindspore/common/tensor.py
@@ -73,7 +73,6 @@ class Tensor(Tensor_):
         else:
             Tensor_.__init__(self, input_data, dtype)
         self._virtual_flag = False
-        self._init_flag = False
 
     def __repr__(self):
         return str(self.__str__())
@@ -182,6 +181,9 @@ class Tensor(Tensor_):
     def __imod__(self, other):
         return self.__mod__(other)
 
+    def __pow__(self, other):
+        return tensor_operator_registry.get('__pow__')(self, other)
+
     def __floordiv__(self, other):
         return tensor_operator_registry.get('__floordiv__')(self, other)
 
@@ -205,19 +207,6 @@ class Tensor(Tensor_):
             raise TypeError("virtual_flag must be bool.")
         self._virtual_flag = value
 
-    @property
-    def init_flag(self):
-        """whether the tensor is init."""
-        return self._init_flag
-
-    @init_flag.setter
-    def init_flag(self, value):
-        """Set the tensor is init_flag."""
-        if not isinstance(value, bool):
-            raise TypeError("init_flag must be bool.")
-        self.set_init_flag(value)
-        self._init_flag = value
-
 
 class IndexedSlices:
     def __init__(self, indices, values, dense_shape):
diff --git a/mindspore/communication/_comm_helper.py b/mindspore/communication/_comm_helper.py
index 508aa2e7a9a..5e1f7d06e72 100644
--- a/mindspore/communication/_comm_helper.py
+++ b/mindspore/communication/_comm_helper.py
@@ -14,7 +14,7 @@
 # ============================================================================
 """comm_helper"""
 
-
+import os
 from ._hccl_management import load_lib as hccl_load_lib
 
 _HCCL_AVAILABLE = False
@@ -44,7 +44,7 @@ else:
 
 HCCL_WORLD_COMM_GROUP = "hccl_world_group"
 NCCL_WORLD_COMM_GROUP = "nccl_world_group"
-
+MS_ROLE = os.getenv("MS_ROLE")
 
 class Backend:
     """
@@ -152,6 +152,9 @@ def _get_rank_helper(group, backend):
         Integer. The local rank id of the calling process.
     """
     rank_id = None
+    if MS_ROLE in ("MS_PSERVER", "MS_SCHED"):
+        rank_id = 0
+        return rank_id
     if backend == Backend.HCCL:
         if group == HCCL_WORLD_COMM_GROUP:
             rank_id = hccl.get_rank_id()
@@ -211,6 +214,9 @@ def _get_size_helper(group, backend):
         Integer. The rank size of specified group.
     """
     size = None
+    if MS_ROLE in ("MS_PSERVER", "MS_SCHED"):
+        size = 1
+        return size
     if backend == Backend.HCCL:
         if group == HCCL_WORLD_COMM_GROUP:
             size = hccl.get_rank_size()
diff --git a/mindspore/communication/management.py b/mindspore/communication/management.py
index 1cd60fe2e53..3fb4e7b9477 100755
--- a/mindspore/communication/management.py
+++ b/mindspore/communication/management.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ============================================================================
 """Communication management API"""
+import os
 from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from ._comm_helper import Backend, _get_rank_helper, _get_size_helper, \
     _get_world_rank_from_group_rank_helper, _get_group_rank_from_world_rank_helper, \
@@ -28,6 +29,7 @@ __all__ = ["init", "release", "get_rank", "get_local_rank", "get_group_size",
 
 DEFAULT_WORLD_COMM_GROUP = HCCL_WORLD_COMM_GROUP
 DEFAULT_BACKEND = Backend("hccl")
+MS_ROLE = os.getenv("MS_ROLE")
 
 
 def _get_group(group):
@@ -58,6 +60,8 @@ def init(backend_name="hccl"):
         TypeError: If backend name is not a string.
         RuntimeError: If backend is invalid or distributed init fails.
     """
+    if MS_ROLE in ("MS_PSERVER", "MS_SCHED"):
+        return
     if not isinstance(backend_name, str):
         raise TypeError("Backend name must be a string, but got {}".format(type(backend_name)))
 
diff --git a/mindspore/context.py b/mindspore/context.py
index b5be6c32132..0de6084caf5 100644
--- a/mindspore/context.py
+++ b/mindspore/context.py
@@ -17,6 +17,7 @@ The context of mindspore, used to configure the current execution environment,
 including execution mode, execution backend and other feature switches.
 """
 import os
+import time
 import threading
 from collections import namedtuple
 from types import FunctionType
@@ -55,12 +56,20 @@ def _make_directory(path):
             os.makedirs(path)
             real_path = path
         except PermissionError as e:
-            logger.error(
-                f"No write permission on the directory `{path}, error = {e}")
+            logger.error(f"No write permission on the directory `{path}, error = {e}")
             raise ValueError(f"No write permission on the directory `{path}`.")
     return real_path
 
 
+def _get_print_file_name(file_name):
+    """Add timestamp suffix to file name. Rename the file name:  file_name + "." + time(seconds)."""
+    time_second = str(int(time.time()))
+    file_name = file_name + "." + time_second
+    if os.path.exists(file_name):
+        ValueError("This file {} already exists.".format(file_name))
+    return file_name
+
+
 class _ThreadLocalInfo(threading.local):
     """
     Thread local Info used for store thread local attributes.
@@ -209,6 +218,8 @@ class _Context:
         success = self._context_handle.set_device_target(target)
         if not success:
             raise ValueError("Target device name is invalid!!!")
+        if self.enable_debug_runtime and self.device_target == "CPU":
+            self.set_backend_policy("vm")
 
     @property
     def device_id(self):
@@ -355,14 +366,6 @@ class _Context:
     def check_bprop(self, check_bprop_flag):
         self._context_handle.set_check_bprop_flag(check_bprop_flag)
 
-    @property
-    def enable_sparse(self):
-        return self._context_handle.get_enable_sparse_flag()
-
-    @enable_sparse.setter
-    def enable_sparse(self, enable_sparse_flag):
-        self._context_handle.set_enable_sparse_flag(enable_sparse_flag)
-
     @property
     def max_device_memory(self):
         return self._context_handle.get_max_device_memory()
@@ -381,9 +384,28 @@ class _Context:
         return None
 
     @print_file_path.setter
-    def print_file_path(self, file):
-        self._context_handle.set_print_file_path(file)
+    def print_file_path(self, file_path):
+        """Add timestamp suffix to file name. Sets print file path."""
+        print_file_path = os.path.realpath(file_path)
+        if os.path.isdir(print_file_path):
+            raise IOError("Print_file_path should be file path, but got {}.".format(file_path))
 
+        if os.path.exists(print_file_path):
+            _path, _file_name = os.path.split(print_file_path)
+            path = _make_directory(_path)
+            file_name = _get_print_file_name(_file_name)
+            full_file_name = os.path.join(path, file_name)
+        else:
+            full_file_name = print_file_path
+        self._context_handle.set_print_file_path(full_file_name)
+
+    @property
+    def enable_sparse(self):
+        return self._context_handle.get_enable_sparse()
+
+    @enable_sparse.setter
+    def enable_sparse(self, enable_sparse):
+        self._context_handle.set_enable_sparse(enable_sparse)
 
 def check_input_format(x):
     import re
@@ -575,8 +597,9 @@ def set_context(**kwargs):
         max_device_memory (str): Sets the maximum memory available for device, currently only supported on GPU.
             The format is "xxGB". Default: "1024GB".
         print_file_path (str): The path of print data to save. If this parameter is set, print data is saved to
-            a file by default, and turn off printing to the screen.
-        enable_sparse (bool): Whether to enable sparse feature. Default: False.
+            a file by default, and turn off printing to the screen. If the file already exists, add a timestamp
+            suffix to the file.
+        enable_sparse (bool): Whether to enable sparsity feature. Default: False.
 
     Raises:
         ValueError: If input key is not an attribute in context.
diff --git a/mindspore/core/abstract/CMakeLists.txt b/mindspore/core/abstract/CMakeLists.txt
new file mode 100644
index 00000000000..fa331776b34
--- /dev/null
+++ b/mindspore/core/abstract/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB_RECURSE _ABSTRACT_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+set_property(SOURCE ${_ABSTRACT_ALL_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ABSTRACT)
+add_library(_mindspore_abstract_obj OBJECT ${_ABSTRACT_ALL_SRC_FILES})
diff --git a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc b/mindspore/core/abstract/abstract_value.cc
similarity index 95%
rename from mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc
rename to mindspore/core/abstract/abstract_value.cc
index b59545e5ae6..7bef3829a61 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc
+++ b/mindspore/core/abstract/abstract_value.cc
@@ -16,13 +16,12 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 #include <algorithm>
 
 #include "utils/symbolic.h"
-#include "pipeline/static_analysis/static_analysis.h"
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/utils.h"
 
 namespace mindspore {
 namespace abstract {
@@ -55,7 +54,6 @@ ValuePtr AbstractBase::BuildValue() const {
 AbstractBasePtr AbstractBase::Broaden() const {
   AbstractBasePtr clone = Clone();
   clone->set_value(kAnyValue);
-  clone->set_sparse_grad(sparse_grad_);
   return clone;
 }
 
@@ -68,8 +66,7 @@ std::string AbstractBase::ToString() const {
   MS_EXCEPTION_IF_NULL(type_);
   MS_EXCEPTION_IF_NULL(shape_);
   buffer << type_name() << "("
-         << "Type: " << type_->ToString() << " Value: " << value << " Shape: " << shape_->ToString()
-         << " sparse_grad: " << sparse_grad_ << " has_indexed_slices_grad: " << has_indexed_slices_grad_ << ")";
+         << "Type: " << type_->ToString() << " Value: " << value << " Shape: " << shape_->ToString() << ")";
   return buffer.str();
 }
 
@@ -78,25 +75,16 @@ AbstractBasePtr AbstractScalar::Broaden() const { return AbstractBase::Broaden()
 AbstractBasePtr AbstractScalar::Join(const AbstractBasePtr &other) {
   MS_EXCEPTION_IF_NULL(other);
   if (*this == *other) {
-    auto ret = shared_from_base<AbstractBase>();
-    ret->set_sparse_grad(sparse_grad());
-    ret->set_has_indexed_slices_grad(has_indexed_slices_grad());
-    return ret;
+    return shared_from_base<AbstractBase>();
   }
   auto value_self = GetValueTrack();
   MS_EXCEPTION_IF_NULL(value_self);
   ValuePtr res_value = ValueJoin(value_self, other->GetValueTrack());
   TypePtr res_type = TypeJoin(GetTypeTrack(), other->GetTypeTrack());
   if (res_value == value_self) {
-    auto ret = shared_from_base<AbstractBase>();
-    ret->set_sparse_grad(sparse_grad());
-    ret->set_has_indexed_slices_grad(has_indexed_slices_grad());
-    return ret;
+    return shared_from_base<AbstractBase>();
   }
-  auto ret = std::make_shared<AbstractScalar>(res_value, res_type);
-  ret->set_sparse_grad(sparse_grad());
-  ret->set_has_indexed_slices_grad(has_indexed_slices_grad());
-  return ret;
+  return std::make_shared<AbstractScalar>(res_value, res_type);
 }
 
 AbstractBasePtr AbstractType::Clone() const {
@@ -452,16 +440,11 @@ AbstractBasePtr AbstractTensor::Join(const AbstractBasePtr &other) {
     MS_LOG(EXCEPTION) << "Join failed as type mismatch, this: " << ToString() << ", other: " << other->ToString();
   }
   if (*this == *other) {
-    if (sparse_grad() == other->sparse_grad()) {
-      return shared_from_base<AbstractBase>();
-    }
+    return shared_from_base<AbstractBase>();
   }
   auto element = element_->Join(other_tensor->element_);
   auto shape = ShapeJoin(this->shape(), other_tensor->shape());
-  auto ret = std::make_shared<AbstractTensor>(element, shape);
-  ret->set_sparse_grad(sparse_grad());
-  ret->set_has_indexed_slices_grad(has_indexed_slices_grad());
-  return ret;
+  return std::make_shared<AbstractTensor>(element, shape);
 }
 
 bool AbstractTensor::operator==(const AbstractTensor &other) const {
@@ -501,8 +484,6 @@ AbstractBasePtr AbstractTensor::Clone() const {
   ShapePtr shp = shape();
   clone->set_shape(shp->Clone());
   clone->set_value(GetValueTrack());
-  clone->set_sparse_grad(sparse_grad());
-  clone->set_has_indexed_slices_grad(has_indexed_slices_grad());
   return clone;
 }
 
@@ -512,8 +493,6 @@ AbstractBasePtr AbstractTensor::Broaden() const {
   auto shp = shape();
   broaden->set_shape(shp->Clone());
   broaden->set_value(kAnyValue);
-  broaden->set_sparse_grad(sparse_grad());
-  broaden->set_has_indexed_slices_grad(has_indexed_slices_grad());
   return broaden;
 }
 
@@ -524,8 +503,6 @@ AbstractBasePtr AbstractTensor::BroadenWithShape() const {
   shp->Broaden();
   broaden->set_shape(shp);
   broaden->set_value(kAnyValue);
-  broaden->set_sparse_grad(sparse_grad());
-  broaden->set_has_indexed_slices_grad(has_indexed_slices_grad());
   return broaden;
 }
 
@@ -538,8 +515,7 @@ std::string AbstractTensor::ToString() const {
   MS_EXCEPTION_IF_NULL(value_track);
   buffer << type_name() << "("
          << "shape: " << shape_track->ToString() << ", element: " << element_->ToString()
-         << ", value_ptr: " << value_track << ", value: " << value_track->ToString() << " sparse_grad " << sparse_grad()
-         << " has_indexed_slices_grad " << has_indexed_slices_grad() << ")";
+         << ", value_ptr: " << value_track << ", value: " << value_track->ToString() << ")";
   return buffer.str();
 }
 
@@ -838,7 +814,8 @@ bool AbstractRef::operator==(const AbstractBase &other) const {
 AbstractBasePtr AbstractRef::Join(const AbstractBasePtr &other) {
   auto other_ref = other->cast<AbstractRefPtr>();
   if (other_ref == nullptr) {
-    MS_LOG(EXCEPTION) << "Join failed as type mismatch, this: " << ToString() << ", other: " << other->ToString();
+    auto new_ref = ref_->Join(other);
+    return std::make_shared<AbstractRef>(ref_key_, new_ref, ref_origin_);
   }
   if (*this == *other) {
     return shared_from_base<AbstractBase>();
diff --git a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.h b/mindspore/core/abstract/abstract_value.h
similarity index 97%
rename from mindspore/ccsrc/pipeline/static_analysis/abstract_value.h
rename to mindspore/core/abstract/abstract_value.h
index 3981a6eb231..d922f93e70b 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.h
+++ b/mindspore/core/abstract/abstract_value.h
@@ -16,8 +16,8 @@
  * limitations under the License.
  */
 
-#ifndef PIPELINE_STATIC_ANALYSIS_ABSTRACT_VALUE_H_
-#define PIPELINE_STATIC_ANALYSIS_ABSTRACT_VALUE_H_
+#ifndef MINDSPORE_CCSRC_ABSTRACT_ABSTRACT_VALUE_H_
+#define MINDSPORE_CCSRC_ABSTRACT_ABSTRACT_VALUE_H_
 
 #include <utility>
 #include <vector>
@@ -27,11 +27,11 @@
 
 #include "utils/log_adapter.h"
 #include "utils/hashing.h"
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/dtype.h"
 #include "ir/value.h"
 #include "ir/tensor.h"
-#include "pipeline/static_analysis/dshape.h"
+#include "abstract/dshape.h"
 
 namespace mindspore {
 namespace abstract {
@@ -44,7 +44,7 @@ class AbstractBase : public Base {
  public:
   explicit AbstractBase(const ValuePtr &value = nullptr, const TypePtr &type = kAnyType,
                         const BaseShapePtr &shape = kNoShape)
-      : value_(value), type_(type), shape_(shape), sparse_grad_(""), has_indexed_slices_grad_(false) {}
+      : value_(value), type_(type), shape_(shape) {}
   ~AbstractBase() override = default;
   MS_DECLARE_PARENT(AbstractBase, Base)
 
@@ -53,17 +53,11 @@ class AbstractBase : public Base {
 
   virtual bool operator==(const AbstractBase &other) const;
   void set_value(const ValuePtr &value) { value_ = value; }
-  void set_sparse_grad(const std::string &sparse_grad) { sparse_grad_ = sparse_grad; }
-  void set_has_indexed_slices_grad(const bool &has_indexed_slices_grad) {
-    has_indexed_slices_grad_ = has_indexed_slices_grad;
-  }
   void set_type(const TypePtr &type) { type_ = type; }
   void set_shape(const BaseShapePtr &shape) { shape_ = shape; }
   void set_value_desc(const std::string &desc) { value_desc_ = desc; }
   const std::string &value_desc() const { return value_desc_; }
   ValuePtr GetValueTrack() const { return value_; }
-  const std::string &sparse_grad() const { return sparse_grad_; }
-  const bool &has_indexed_slices_grad() const { return has_indexed_slices_grad_; }
   TypePtr GetTypeTrack() const { return type_; }
   BaseShapePtr GetShapeTrack() const { return shape_; }
 
@@ -91,8 +85,6 @@ class AbstractBase : public Base {
   TypePtr type_;
   BaseShapePtr shape_;
   std::string value_desc_;  // store initial value description for error report
-  std::string sparse_grad_;
-  bool has_indexed_slices_grad_;
 };
 
 class AbstractScalar : public AbstractBase {
@@ -631,4 +623,4 @@ class AbstractIndexedSlices : public AbstractUndetermined {
 };
 }  // namespace abstract
 }  // namespace mindspore
-#endif  // PIPELINE_STATIC_ANALYSIS_ABSTRACT_VALUE_H_
+#endif  // MINDSPORE_CCSRC_ABSTRACT_ABSTRACT_VALUE_H_
diff --git a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc b/mindspore/core/abstract/analysis_context.cc
similarity index 99%
rename from mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc
rename to mindspore/core/abstract/analysis_context.cc
index 4a43b141687..1ae6125838e 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc
+++ b/mindspore/core/abstract/analysis_context.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/analysis_context.h"
+#include "abstract/analysis_context.h"
 
 #include <algorithm>
 
diff --git a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.h b/mindspore/core/abstract/analysis_context.h
similarity index 93%
rename from mindspore/ccsrc/pipeline/static_analysis/analysis_context.h
rename to mindspore/core/abstract/analysis_context.h
index c0b34037024..c0293d7e917 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.h
+++ b/mindspore/core/abstract/analysis_context.h
@@ -16,14 +16,14 @@
  * limitations under the License.
  */
 
-#ifndef PIPELINE_STATIC_ANALYSIS_ANALYSIS_CONTEXT_H_
-#define PIPELINE_STATIC_ANALYSIS_ANALYSIS_CONTEXT_H_
+#ifndef MINDSPORE_CCSRC_ABSTRACT_ANALYSIS_CONTEXT_H_
+#define MINDSPORE_CCSRC_ABSTRACT_ANALYSIS_CONTEXT_H_
 
 #include <memory>
 #include <string>
 #include <unordered_map>
 
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #include "ir/meta_func_graph.h"
 
 namespace mindspore {
@@ -85,4 +85,4 @@ struct ContextEqual {
 extern const AnalysisContextPtr kDummyAnalysisContext;
 }  // namespace abstract
 }  // namespace mindspore
-#endif  // PIPELINE_STATIC_ANALYSIS_ANALYSIS_CONTEXT_H_
+#endif  // MINDSPORE_CCSRC_ABSTRACT_ANALYSIS_CONTEXT_H_
diff --git a/mindspore/ccsrc/pipeline/static_analysis/dshape.cc b/mindspore/core/abstract/dshape.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/static_analysis/dshape.cc
rename to mindspore/core/abstract/dshape.cc
index 183ec772fff..74ea1ff7bfe 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/dshape.cc
+++ b/mindspore/core/abstract/dshape.cc
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/dshape.h"
+#include "abstract/dshape.h"
 
 #include <exception>
 #include <iostream>
diff --git a/mindspore/ccsrc/pipeline/static_analysis/dshape.h b/mindspore/core/abstract/dshape.h
similarity index 96%
rename from mindspore/ccsrc/pipeline/static_analysis/dshape.h
rename to mindspore/core/abstract/dshape.h
index 3e850e309b4..b9b8e93292f 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/dshape.h
+++ b/mindspore/core/abstract/dshape.h
@@ -16,8 +16,8 @@
  * limitations under the License.
  */
 
-#ifndef PIPELINE_STATIC_ANALYSIS_DSHAPE_H_
-#define PIPELINE_STATIC_ANALYSIS_DSHAPE_H_
+#ifndef MINDSPORE_CCSRC_ABSTRACT_DSHAPE_H_
+#define MINDSPORE_CCSRC_ABSTRACT_DSHAPE_H_
 
 #include <vector>
 #include <string>
@@ -27,7 +27,7 @@
 #include <memory>
 
 #include "utils/log_adapter.h"
-#include "ir/base.h"
+#include "base/base.h"
 
 namespace mindspore {
 namespace abstract {
@@ -132,4 +132,4 @@ using ListShapePtr = std::shared_ptr<ListShape>;
 }  // namespace abstract
 }  // namespace mindspore
 
-#endif  // PIPELINE_STATIC_ANALYSIS_DSHAPE_H_
+#endif  // MINDSPORE_CCSRC_ABSTRACT_DSHAPE_H_
diff --git a/mindspore/ccsrc/pipeline/static_analysis/param_validator.cc b/mindspore/core/abstract/param_validator.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/static_analysis/param_validator.cc
rename to mindspore/core/abstract/param_validator.cc
index 2cbd33c1626..69fe88b4a3d 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/param_validator.cc
+++ b/mindspore/core/abstract/param_validator.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/param_validator.h"
+#include "abstract/param_validator.h"
 
 #include <string>
 #include <sstream>
 #include <memory>
 #include "utils/symbolic.h"
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/utils.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/param_validator.h b/mindspore/core/abstract/param_validator.h
similarity index 93%
rename from mindspore/ccsrc/pipeline/static_analysis/param_validator.h
rename to mindspore/core/abstract/param_validator.h
index daa436d66df..434235abda3 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/param_validator.h
+++ b/mindspore/core/abstract/param_validator.h
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#ifndef PIPELINE_STATIC_ANALYSIS_PARAM_VALIDATOR_H_
-#define PIPELINE_STATIC_ANALYSIS_PARAM_VALIDATOR_H_
+#ifndef MINDSPORE_CCSRC_ABSTRACT_PARAM_VALIDATOR_H_
+#define MINDSPORE_CCSRC_ABSTRACT_PARAM_VALIDATOR_H_
 
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/abstract_value.h"
+#include "abstract/utils.h"
 #include "utils/any.h"
 #include "ir/primitive.h"
 
@@ -97,4 +97,4 @@ void CheckArgsSpec(const AbstractBasePtrList &args_list) {
 }  // namespace abstract
 }  // namespace mindspore
 
-#endif  // PIPELINE_STATIC_ANALYSIS_PARAM_VALIDATOR_H_
+#endif  // MINDSPORE_CCSRC_ABSTRACT_PARAM_VALIDATOR_H_
diff --git a/mindspore/ccsrc/pipeline/static_analysis/utils.cc b/mindspore/core/abstract/utils.cc
similarity index 98%
rename from mindspore/ccsrc/pipeline/static_analysis/utils.cc
rename to mindspore/core/abstract/utils.cc
index 4c399f6ffc1..16497c74a9b 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/utils.cc
+++ b/mindspore/core/abstract/utils.cc
@@ -16,13 +16,13 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/utils.h"
 
 #include <string>
 #include <sstream>
 #include <memory>
 #include "utils/symbolic.h"
-#include "pipeline/static_analysis/param_validator.h"
+#include "abstract/param_validator.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/utils.h b/mindspore/core/abstract/utils.h
similarity index 90%
rename from mindspore/ccsrc/pipeline/static_analysis/utils.h
rename to mindspore/core/abstract/utils.h
index 6a709ea99cd..be38ae860da 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/utils.h
+++ b/mindspore/core/abstract/utils.h
@@ -16,18 +16,17 @@
  * limitations under the License.
  */
 
-#ifndef PIPELINE_STATIC_ANALYSIS_UTILS_H_
-#define PIPELINE_STATIC_ANALYSIS_UTILS_H_
+#ifndef MINDSPORE_CCSRC_ABSTRACT_UTILS_H_
+#define MINDSPORE_CCSRC_ABSTRACT_UTILS_H_
 
 #include <vector>
 #include <utility>
 #include <memory>
 #include <string>
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #include "utils/any.h"
 #include "utils/misc.h"
 #include "utils/convert_utils.h"
-#include "ir/primitive.h"
 
 namespace mindspore {
 namespace abstract {
@@ -54,4 +53,4 @@ int GetPositiveAxis(int axis_value, size_t increment);
 ShapePtr GetBroadcastShape(const std::string &op, const AbstractTensorPtr &tensor_x, const AbstractTensorPtr &tensor_y);
 }  // namespace abstract
 }  // namespace mindspore
-#endif  // PIPELINE_STATIC_ANALYSIS_UTILS_H_
+#endif  // MINDSPORE_CCSRC_ABSTRACT_UTILS_H_
diff --git a/mindspore/core/base/CMakeLists.txt b/mindspore/core/base/CMakeLists.txt
new file mode 100644
index 00000000000..d65b91a8249
--- /dev/null
+++ b/mindspore/core/base/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB_RECURSE _BASE_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+set_property(SOURCE ${_BASE_ALL_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_BASE)
+add_library(_mindspore_base_obj OBJECT ${_BASE_ALL_SRC_FILES})
diff --git a/mindspore/ccsrc/ir/base.cc b/mindspore/core/base/base.cc
similarity index 98%
rename from mindspore/ccsrc/ir/base.cc
rename to mindspore/core/base/base.cc
index 7a03269ad8f..07ed252e962 100644
--- a/mindspore/ccsrc/ir/base.cc
+++ b/mindspore/core/base/base.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ir/base.h"
+#include "base/base.h"
 #include <atomic>
 #include <mutex>
 #include <unordered_map>
diff --git a/mindspore/ccsrc/ir/base.h b/mindspore/core/base/base.h
similarity index 97%
rename from mindspore/ccsrc/ir/base.h
rename to mindspore/core/base/base.h
index 7dc4145837b..8e1a447c0df 100644
--- a/mindspore/ccsrc/ir/base.h
+++ b/mindspore/core/base/base.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_CCSRC_IR_BASE_H_
-#define MINDSPORE_CCSRC_IR_BASE_H_
+#ifndef MINDSPORE_CCSRC_BASE_BASE_H_
+#define MINDSPORE_CCSRC_BASE_BASE_H_
 
 #include <atomic>
 #include <iostream>
@@ -149,4 +149,4 @@ struct MS_EXPORT TypeIdManager {
 };
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_IR_BASE_H_
+#endif  // MINDSPORE_CCSRC_BASE_BASE_H_
diff --git a/mindspore/ccsrc/ir/CMakeLists.txt b/mindspore/core/ir/CMakeLists.txt
similarity index 100%
rename from mindspore/ccsrc/ir/CMakeLists.txt
rename to mindspore/core/ir/CMakeLists.txt
diff --git a/mindspore/ccsrc/ir/anf.cc b/mindspore/core/ir/anf.cc
similarity index 99%
rename from mindspore/ccsrc/ir/anf.cc
rename to mindspore/core/ir/anf.cc
index 4c1d2bf50dc..0d96ddf263c 100644
--- a/mindspore/ccsrc/ir/anf.cc
+++ b/mindspore/core/ir/anf.cc
@@ -24,9 +24,9 @@
 #include <unordered_map>
 
 #include "ir/func_graph.h"
-#include "ir/primitive_base.h"
+#include "ir/primitive.h"
 #include "utils/context/ms_context.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 // namespace to support intermediate representation definition
diff --git a/mindspore/ccsrc/ir/anf.h b/mindspore/core/ir/anf.h
similarity index 98%
rename from mindspore/ccsrc/ir/anf.h
rename to mindspore/core/ir/anf.h
index 8a446278851..c1a28d57f18 100644
--- a/mindspore/ccsrc/ir/anf.h
+++ b/mindspore/core/ir/anf.h
@@ -26,9 +26,10 @@
 #include <unordered_map>
 #include <utility>
 
-#include "ir/base.h"
-#include "debug/info.h"
+#include "base/base.h"
+#include "ir/kernel_info_dev.h"
 #include "ir/scope.h"
+#include "debug/info.h"
 
 // A MindSpore ANF IR defined here.
 // with BNF followed:
@@ -71,19 +72,9 @@ class BaseRef;
 class Var;
 using VarPtr = std::shared_ptr<Var>;
 
-namespace device {
-class KernelInfo;
-}  // namespace device
-using KernelInfoDevice = device::KernelInfo;
-using KernelInfoDevicePtr = std::shared_ptr<KernelInfoDevice>;
-
 class AnfVisitor;
 
-class ParamValue {
- public:
-  ParamValue() = default;
-  virtual ~ParamValue() = default;
-};
+class ParamValue;
 using ParamValuePtr = std::shared_ptr<ParamValue>;
 
 // AnfNode is the basic class of the IR definition derived from Base.
diff --git a/mindspore/ccsrc/ir/anf_extends.cc b/mindspore/core/ir/anf_extends.cc
similarity index 97%
rename from mindspore/ccsrc/ir/anf_extends.cc
rename to mindspore/core/ir/anf_extends.cc
index 432ffdb6060..b70a660aae6 100644
--- a/mindspore/ccsrc/ir/anf_extends.cc
+++ b/mindspore/core/ir/anf_extends.cc
@@ -22,9 +22,9 @@
 #include <unordered_map>
 
 #include "ir/visitor.h"
-#include "pipeline/static_analysis/static_analysis.h"
-#include "operator/ops.h"
-#include "parallel/ops_info/ops_utils.h"
+#include "ir/func_graph.h"
+#include "frontend/operator/ops.h"
+#include "frontend/parallel/ops_info/ops_utils.h"
 #include "debug/label.h"
 
 namespace mindspore {
diff --git a/mindspore/core/ir/anf_py.cc b/mindspore/core/ir/anf_py.cc
new file mode 100644
index 00000000000..d033dfff5ae
--- /dev/null
+++ b/mindspore/core/ir/anf_py.cc
@@ -0,0 +1,28 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <string>
+#include "ir/anf.h"
+
+#include "pybind_api/api_register.h"
+
+namespace mindspore {
+// Define python 'RefKey' class.
+REGISTER_PYBIND_DEFINE(CNode, ([](const pybind11::module *m) {
+                         (void)py::class_<CNode, CNodePtr>(*m, "CNode")
+                           .def("expanded_str", (std::string(CNode::*)(int) const) & CNode::DebugString,
+                                "Get CNode string representation with specified expansion level.");
+                       }));
+}  // namespace mindspore
diff --git a/mindspore/core/ir/device_sync.h b/mindspore/core/ir/device_sync.h
new file mode 100644
index 00000000000..a6bbe92233a
--- /dev/null
+++ b/mindspore/core/ir/device_sync.h
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_IR_DEVICE_SYNC_H_
+#define MINDSPORE_CCSRC_IR_DEVICE_SYNC_H_
+
+#include <vector>
+#include <memory>
+#include <string>
+
+#include "ir/dtype/type.h"
+
+using std::string;
+
+namespace mindspore {
+// Interface for data synchornize between device and host.
+class DeviceSync {
+ public:
+  virtual bool SyncDeviceToHost(const std::vector<int> &shape, size_t size, TypeId type, void *host_ptr) const = 0;
+  virtual bool SyncHostToDevice(const std::vector<int> &shape, size_t size, TypeId type,
+                                const void *host_ptr) const = 0;
+};
+using DeviceSyncPtr = std::shared_ptr<DeviceSync>;
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_IR_DEVICE_SYNC_H_
diff --git a/mindspore/ccsrc/ir/dtype.cc b/mindspore/core/ir/dtype.cc
similarity index 100%
rename from mindspore/ccsrc/ir/dtype.cc
rename to mindspore/core/ir/dtype.cc
diff --git a/mindspore/ccsrc/ir/dtype.h b/mindspore/core/ir/dtype.h
similarity index 99%
rename from mindspore/ccsrc/ir/dtype.h
rename to mindspore/core/ir/dtype.h
index f10c56e6594..dc277c031c6 100644
--- a/mindspore/ccsrc/ir/dtype.h
+++ b/mindspore/core/ir/dtype.h
@@ -28,7 +28,7 @@
 #include <type_traits>
 #include <unordered_map>
 #include <algorithm>
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/named.h"
 
 #include "ir/dtype/type.h"
diff --git a/mindspore/ccsrc/ir/dtype/container.cc b/mindspore/core/ir/dtype/container.cc
similarity index 100%
rename from mindspore/ccsrc/ir/dtype/container.cc
rename to mindspore/core/ir/dtype/container.cc
diff --git a/mindspore/ccsrc/ir/dtype/container.h b/mindspore/core/ir/dtype/container.h
similarity index 99%
rename from mindspore/ccsrc/ir/dtype/container.h
rename to mindspore/core/ir/dtype/container.h
index 0612d24c4dd..29579fe73cf 100644
--- a/mindspore/ccsrc/ir/dtype/container.h
+++ b/mindspore/core/ir/dtype/container.h
@@ -29,7 +29,7 @@
 #include <type_traits>
 #include <unordered_map>
 #include <algorithm>
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/named.h"
 #include "ir/dtype/type.h"
 
diff --git a/mindspore/ccsrc/ir/dtype/empty.cc b/mindspore/core/ir/dtype/empty.cc
similarity index 100%
rename from mindspore/ccsrc/ir/dtype/empty.cc
rename to mindspore/core/ir/dtype/empty.cc
diff --git a/mindspore/ccsrc/ir/dtype/empty.h b/mindspore/core/ir/dtype/empty.h
similarity index 99%
rename from mindspore/ccsrc/ir/dtype/empty.h
rename to mindspore/core/ir/dtype/empty.h
index e3b46ec7d9a..e6149a1fce8 100644
--- a/mindspore/ccsrc/ir/dtype/empty.h
+++ b/mindspore/core/ir/dtype/empty.h
@@ -29,7 +29,7 @@
 #include <type_traits>
 #include <unordered_map>
 #include <algorithm>
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/named.h"
 #include "ir/dtype/type.h"
 
diff --git a/mindspore/ccsrc/ir/dtype/number.cc b/mindspore/core/ir/dtype/number.cc
similarity index 100%
rename from mindspore/ccsrc/ir/dtype/number.cc
rename to mindspore/core/ir/dtype/number.cc
diff --git a/mindspore/ccsrc/ir/dtype/number.h b/mindspore/core/ir/dtype/number.h
similarity index 99%
rename from mindspore/ccsrc/ir/dtype/number.h
rename to mindspore/core/ir/dtype/number.h
index f8a746f8d68..8997ddc4dfe 100644
--- a/mindspore/ccsrc/ir/dtype/number.h
+++ b/mindspore/core/ir/dtype/number.h
@@ -29,7 +29,7 @@
 #include <type_traits>
 #include <unordered_map>
 #include <algorithm>
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/named.h"
 #include "ir/dtype/type.h"
 
diff --git a/mindspore/ccsrc/ir/dtype/ref.cc b/mindspore/core/ir/dtype/ref.cc
similarity index 100%
rename from mindspore/ccsrc/ir/dtype/ref.cc
rename to mindspore/core/ir/dtype/ref.cc
diff --git a/mindspore/ccsrc/ir/dtype/ref.h b/mindspore/core/ir/dtype/ref.h
similarity index 98%
rename from mindspore/ccsrc/ir/dtype/ref.h
rename to mindspore/core/ir/dtype/ref.h
index 7d8159289f0..e798d72af54 100644
--- a/mindspore/ccsrc/ir/dtype/ref.h
+++ b/mindspore/core/ir/dtype/ref.h
@@ -29,7 +29,7 @@
 #include <type_traits>
 #include <unordered_map>
 #include <algorithm>
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/named.h"
 #include "ir/dtype/type.h"
 
diff --git a/mindspore/ccsrc/ir/dtype/type.cc b/mindspore/core/ir/dtype/type.cc
similarity index 100%
rename from mindspore/ccsrc/ir/dtype/type.cc
rename to mindspore/core/ir/dtype/type.cc
diff --git a/mindspore/ccsrc/ir/dtype/type.h b/mindspore/core/ir/dtype/type.h
similarity index 99%
rename from mindspore/ccsrc/ir/dtype/type.h
rename to mindspore/core/ir/dtype/type.h
index cba0d17fce1..2e38e8ffb68 100644
--- a/mindspore/ccsrc/ir/dtype/type.h
+++ b/mindspore/core/ir/dtype/type.h
@@ -32,7 +32,7 @@
 #include <unordered_map>
 #include <algorithm>
 
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/named.h"
 #include "ir/dtype/type_id.h"
 
diff --git a/mindspore/ccsrc/ir/dtype/type_extends.cc b/mindspore/core/ir/dtype/type_extends.cc
similarity index 93%
rename from mindspore/ccsrc/ir/dtype/type_extends.cc
rename to mindspore/core/ir/dtype/type_extends.cc
index a77a6a9cbae..771a460c174 100644
--- a/mindspore/ccsrc/ir/dtype/type_extends.cc
+++ b/mindspore/core/ir/dtype/type_extends.cc
@@ -15,7 +15,7 @@
  */
 
 #include "ir/dtype/type.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 abstract::AbstractBasePtr Type::ToAbstract() {
diff --git a/mindspore/ccsrc/ir/dtype/type_id.h b/mindspore/core/ir/dtype/type_id.h
similarity index 88%
rename from mindspore/ccsrc/ir/dtype/type_id.h
rename to mindspore/core/ir/dtype/type_id.h
index a711779e919..6fb2a354c17 100644
--- a/mindspore/ccsrc/ir/dtype/type_id.h
+++ b/mindspore/core/ir/dtype/type_id.h
@@ -86,8 +86,8 @@ enum TypeId : int {
 // TypeId name map
 //
 const std::unordered_map<TypeId, std::string> type_name_map = {
-  {kNumberTypeBool, "Bool"},       {kNumberTypeInt8, "Int8"},       {kNumberTypeUInt8, "UInt8"},
-  {kNumberTypeInt16, "Int16"},     {kNumberTypeInt32, "Int32"},     {kNumberTypeInt64, "Int64"},
-  {kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat32, "Float32"}, {kNumberTypeFloat64, "Float64"}};
+  {kNumberTypeBool, "bool_"},      {kNumberTypeInt8, "int8"},       {kNumberTypeUInt8, "uint8"},
+  {kNumberTypeInt16, "int16"},     {kNumberTypeInt32, "int32"},     {kNumberTypeInt64, "int64"},
+  {kNumberTypeFloat16, "float16"}, {kNumberTypeFloat32, "float32"}, {kNumberTypeFloat64, "float64"}};
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_IR_DTYPE_TYPE_ID_H_
diff --git a/mindspore/ccsrc/ir/dtype_extends.cc b/mindspore/core/ir/dtype_extends.cc
similarity index 69%
rename from mindspore/ccsrc/ir/dtype_extends.cc
rename to mindspore/core/ir/dtype_extends.cc
index 732872cb4f5..099748217ed 100644
--- a/mindspore/ccsrc/ir/dtype_extends.cc
+++ b/mindspore/core/ir/dtype_extends.cc
@@ -19,9 +19,7 @@
 #include <cstdlib>
 #include <algorithm>
 #include "utils/log_adapter.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pybind_api/api_register.h"
-#include "pybind_api/export_flags.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 TypePtr TypeAnything::DeepCopy() const { return kAnyType; }
@@ -425,134 +423,6 @@ bool IsSubType(TypePtr const &t1, TypePtr const &t2) {
   }
 }
 
-REGISTER_PYBIND_DEFINE(
-  typing, ([](py::module *const m) {
-    auto m_sub = m->def_submodule("typing", "submodule for dtype");
-    py::enum_<TypeId>(m_sub, "TypeId");
-    (void)m_sub.def("is_subclass", &IsIdentidityOrSubclass, "is equal or subclass");
-    (void)m_sub.def("load_type", &TypeIdToType, "load type");
-    (void)m_sub.def(
-      "dump_type", [](const TypePtr &t) { return t->type_id(); }, "dump type");
-    (void)m_sub.def("str_to_type", &StringToType, "string to typeptr");
-    (void)py::class_<Type, std::shared_ptr<Type>>(m_sub, "Type")
-      .def_readonly(PYTHON_DTYPE_FLAG, &mindspore::Type::parse_info_)
-      .def("__eq__",
-           [](const TypePtr &t1, const TypePtr &t2) {
-             if (t1 != nullptr && t2 != nullptr) {
-               return *t1 == *t2;
-             }
-             return false;
-           })
-      .def("__hash__", &Type::hash)
-      .def("__str__", &Type::ToString)
-      .def("__repr__", &Type::ReprString)
-      .def("__deepcopy__", [](const TypePtr &t, py::dict) {
-        if (t == nullptr) {
-          return static_cast<TypePtr>(nullptr);
-        }
-        return t->DeepCopy();
-      });
-    (void)py::class_<Number, Type, std::shared_ptr<Number>>(m_sub, "Number").def(py::init());
-    (void)py::class_<Bool, Type, std::shared_ptr<Bool>>(m_sub, "Bool")
-      .def(py::init())
-      .def(py::pickle(
-        [](const Bool &) {  // __getstate__
-          return py::make_tuple();
-        },
-        [](const py::tuple &) {  // __setstate__
-          return std::make_shared<Bool>();
-        }));
-    (void)py::class_<Int, Type, std::shared_ptr<Int>>(m_sub, "Int")
-      .def(py::init())
-      .def(py::init<int>(), py::arg("nbits"))
-      .def(py::pickle(
-        [](const Int &t) {  // __getstate__
-          /* Return a tuple that fully encodes the state of the object */
-          return py::make_tuple(py::int_(t.nbits()));
-        },
-        [](const py::tuple &t) {  // __setstate__
-          if (t.size() != 1) {
-            throw std::runtime_error("Invalid state!");
-          }
-          /* Create a new C++ instance */
-          Int data(t[0].cast<py::int_>());
-          return data;
-        }));
-    (void)py::class_<UInt, Type, std::shared_ptr<UInt>>(m_sub, "UInt")
-      .def(py::init())
-      .def(py::init<int>(), py::arg("nbits"))
-      .def(py::pickle(
-        [](const UInt &t) {  // __getstate__
-          /* Return a tuple that fully encodes the state of the object */
-          return py::make_tuple(py::int_(t.nbits()));
-        },
-        [](const py::tuple &t) {  // __setstate__
-          if (t.size() != 1) {
-            throw std::runtime_error("Invalid state!");
-          }
-          /* Create a new C++ instance */
-          UInt data(t[0].cast<py::int_>());
-          return data;
-        }));
-    (void)py::class_<Float, Type, std::shared_ptr<Float>>(m_sub, "Float")
-      .def(py::init())
-      .def(py::init<int>(), py::arg("nbits"))
-      .def(py::pickle(
-        [](const Float &t) {  // __getstate__
-          /* Return a tuple that fully encodes the state of the object */
-          return py::make_tuple(py::int_(t.nbits()));
-        },
-        [](const py::tuple &t) {  // __setstate__
-          if (t.size() != 1) {
-            throw std::runtime_error("Invalid state!");
-          }
-          /* Create a new C++ instance */
-          Float data(t[0].cast<py::int_>());
-          return data;
-        }));
-    (void)py::class_<List, Type, std::shared_ptr<List>>(m_sub, "List")
-      .def(py::init())
-      .def(py::init<std::vector<TypePtr>>(), py::arg("elements"));
-    (void)py::class_<Tuple, Type, std::shared_ptr<Tuple>>(m_sub, "Tuple")
-      .def(py::init())
-      .def(py::init<std::vector<TypePtr>>(), py::arg("elements"));
-    (void)py::class_<TensorType, Type, std::shared_ptr<TensorType>>(m_sub, "TensorType")
-      .def(py::init())
-      .def(py::init<TypePtr>(), py::arg("element"))
-      .def("element_type", &TensorType::element)
-      .def(py::pickle(
-        [](const TensorType &t) {  // __getstate__
-          /* Return a tuple that fully encodes the state of the object */
-          return py::make_tuple(py::int_(static_cast<int>(t.element()->type_id())));
-        },
-        [](const py::tuple &t) {  // __setstate__
-          if (t.size() != 1) {
-            throw std::runtime_error("Invalid state!");
-          }
-          /* Create a new C++ instance */
-          TensorType data(TypeIdToType(TypeId(static_cast<int>(t[0].cast<py::int_>()))));
-          return data;
-        }));
-    (void)py::class_<IndexedSlicesType, Type, std::shared_ptr<IndexedSlicesType>>(m_sub, "IndexedSlicesType")
-      .def(py::init());
-    (void)py::class_<UndeterminedType, Type, std::shared_ptr<UndeterminedType>>(m_sub, "UndeterminedType")
-      .def(py::init());
-    (void)py::class_<Function, Type, std::shared_ptr<Function>>(m_sub, "Function")
-      .def(py::init())
-      .def(py::init<std::vector<TypePtr>, TypePtr>(), py::arg("args"), py::arg("retval"));
-    (void)py::class_<Class, Type, std::shared_ptr<Class>>(m_sub, "Class").def(py::init());
-    (void)py::class_<SymbolicKeyType, Type, std::shared_ptr<SymbolicKeyType>>(m_sub, "SymbolicKeyType").def(py::init());
-    (void)py::class_<EnvType, Type, std::shared_ptr<EnvType>>(m_sub, "EnvType").def(py::init());
-    (void)py::class_<TypeNone, Type, std::shared_ptr<TypeNone>>(m_sub, "TypeNone").def(py::init());
-    (void)py::class_<TypeType, Type, std::shared_ptr<TypeType>>(m_sub, "TypeType").def(py::init());
-    (void)py::class_<String, Type, std::shared_ptr<String>>(m_sub, "String").def(py::init());
-    (void)py::class_<RefKeyType, Type, std::shared_ptr<RefKeyType>>(m_sub, "RefKeyType").def(py::init());
-    (void)py::class_<RefType, Type, std::shared_ptr<RefType>>(m_sub, "RefType").def(py::init());
-    (void)py::class_<TypeAnything, Type, std::shared_ptr<TypeAnything>>(m_sub, "TypeAnything").def(py::init());
-    (void)py::class_<Slice, Type, std::shared_ptr<Slice>>(m_sub, "Slice").def(py::init());
-    (void)py::class_<TypeEllipsis, Type, std::shared_ptr<TypeEllipsis>>(m_sub, "TypeEllipsis").def(py::init());
-  }));
-
 const TypePtr kTypeExternal = std::make_shared<External>();
 const TypePtr kTypeEnv = std::make_shared<EnvType>();
 const TypePtr kTypeType = std::make_shared<TypeType>();
diff --git a/mindspore/core/ir/dtype_py.cc b/mindspore/core/ir/dtype_py.cc
new file mode 100644
index 00000000000..66bd8ba5f6f
--- /dev/null
+++ b/mindspore/core/ir/dtype_py.cc
@@ -0,0 +1,155 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/dtype.h"
+#include <string>
+#include <cstdlib>
+#include <algorithm>
+#include "utils/log_adapter.h"
+#include "abstract/abstract_value.h"
+#include "pybind_api/api_register.h"
+#include "pybind_api/export_flags.h"
+
+namespace mindspore {
+// Define python wrapper to handle data types.
+REGISTER_PYBIND_DEFINE(
+  typing, ([](py::module *const m) {
+    auto m_sub = m->def_submodule("typing", "submodule for dtype");
+    py::enum_<TypeId>(m_sub, "TypeId");
+    (void)m_sub.def("is_subclass", &IsIdentidityOrSubclass, "is equal or subclass");
+    (void)m_sub.def("load_type", &TypeIdToType, "load type");
+    (void)m_sub.def(
+      "dump_type", [](const TypePtr &t) { return t->type_id(); }, "dump type");
+    (void)m_sub.def("str_to_type", &StringToType, "string to typeptr");
+    (void)py::class_<Type, std::shared_ptr<Type>>(m_sub, "Type")
+      .def_readonly(PYTHON_DTYPE_FLAG, &mindspore::Type::parse_info_)
+      .def("__eq__",
+           [](const TypePtr &t1, const TypePtr &t2) {
+             if (t1 != nullptr && t2 != nullptr) {
+               return *t1 == *t2;
+             }
+             return false;
+           })
+      .def("__hash__", &Type::hash)
+      .def("__str__", &Type::ToString)
+      .def("__repr__", &Type::ReprString)
+      .def("__deepcopy__", [](const TypePtr &t, py::dict) {
+        if (t == nullptr) {
+          return static_cast<TypePtr>(nullptr);
+        }
+        return t->DeepCopy();
+      });
+    (void)py::class_<Number, Type, std::shared_ptr<Number>>(m_sub, "Number").def(py::init());
+    (void)py::class_<Bool, Type, std::shared_ptr<Bool>>(m_sub, "Bool")
+      .def(py::init())
+      .def(py::pickle(
+        [](const Bool &) {  // __getstate__
+          return py::make_tuple();
+        },
+        [](const py::tuple &) {  // __setstate__
+          return std::make_shared<Bool>();
+        }));
+    (void)py::class_<Int, Type, std::shared_ptr<Int>>(m_sub, "Int")
+      .def(py::init())
+      .def(py::init<int>(), py::arg("nbits"))
+      .def(py::pickle(
+        [](const Int &t) {  // __getstate__
+          /* Return a tuple that fully encodes the state of the object */
+          return py::make_tuple(py::int_(t.nbits()));
+        },
+        [](const py::tuple &t) {  // __setstate__
+          if (t.size() != 1) {
+            throw std::runtime_error("Invalid state!");
+          }
+          /* Create a new C++ instance */
+          Int data(t[0].cast<py::int_>());
+          return data;
+        }));
+    (void)py::class_<UInt, Type, std::shared_ptr<UInt>>(m_sub, "UInt")
+      .def(py::init())
+      .def(py::init<int>(), py::arg("nbits"))
+      .def(py::pickle(
+        [](const UInt &t) {  // __getstate__
+          /* Return a tuple that fully encodes the state of the object */
+          return py::make_tuple(py::int_(t.nbits()));
+        },
+        [](const py::tuple &t) {  // __setstate__
+          if (t.size() != 1) {
+            throw std::runtime_error("Invalid state!");
+          }
+          /* Create a new C++ instance */
+          UInt data(t[0].cast<py::int_>());
+          return data;
+        }));
+    (void)py::class_<Float, Type, std::shared_ptr<Float>>(m_sub, "Float")
+      .def(py::init())
+      .def(py::init<int>(), py::arg("nbits"))
+      .def(py::pickle(
+        [](const Float &t) {  // __getstate__
+          /* Return a tuple that fully encodes the state of the object */
+          return py::make_tuple(py::int_(t.nbits()));
+        },
+        [](const py::tuple &t) {  // __setstate__
+          if (t.size() != 1) {
+            throw std::runtime_error("Invalid state!");
+          }
+          /* Create a new C++ instance */
+          Float data(t[0].cast<py::int_>());
+          return data;
+        }));
+    (void)py::class_<List, Type, std::shared_ptr<List>>(m_sub, "List")
+      .def(py::init())
+      .def(py::init<std::vector<TypePtr>>(), py::arg("elements"));
+    (void)py::class_<Tuple, Type, std::shared_ptr<Tuple>>(m_sub, "Tuple")
+      .def(py::init())
+      .def(py::init<std::vector<TypePtr>>(), py::arg("elements"));
+    (void)py::class_<TensorType, Type, std::shared_ptr<TensorType>>(m_sub, "TensorType")
+      .def(py::init())
+      .def(py::init<TypePtr>(), py::arg("element"))
+      .def("element_type", &TensorType::element)
+      .def(py::pickle(
+        [](const TensorType &t) {  // __getstate__
+          /* Return a tuple that fully encodes the state of the object */
+          return py::make_tuple(py::int_(static_cast<int>(t.element()->type_id())));
+        },
+        [](const py::tuple &t) {  // __setstate__
+          if (t.size() != 1) {
+            throw std::runtime_error("Invalid state!");
+          }
+          /* Create a new C++ instance */
+          TensorType data(TypeIdToType(TypeId(static_cast<int>(t[0].cast<py::int_>()))));
+          return data;
+        }));
+    (void)py::class_<IndexedSlicesType, Type, std::shared_ptr<IndexedSlicesType>>(m_sub, "IndexedSlicesType")
+      .def(py::init());
+    (void)py::class_<UndeterminedType, Type, std::shared_ptr<UndeterminedType>>(m_sub, "UndeterminedType")
+      .def(py::init());
+    (void)py::class_<Function, Type, std::shared_ptr<Function>>(m_sub, "Function")
+      .def(py::init())
+      .def(py::init<std::vector<TypePtr>, TypePtr>(), py::arg("args"), py::arg("retval"));
+    (void)py::class_<Class, Type, std::shared_ptr<Class>>(m_sub, "Class").def(py::init());
+    (void)py::class_<SymbolicKeyType, Type, std::shared_ptr<SymbolicKeyType>>(m_sub, "SymbolicKeyType").def(py::init());
+    (void)py::class_<EnvType, Type, std::shared_ptr<EnvType>>(m_sub, "EnvType").def(py::init());
+    (void)py::class_<TypeNone, Type, std::shared_ptr<TypeNone>>(m_sub, "TypeNone").def(py::init());
+    (void)py::class_<TypeType, Type, std::shared_ptr<TypeType>>(m_sub, "TypeType").def(py::init());
+    (void)py::class_<String, Type, std::shared_ptr<String>>(m_sub, "String").def(py::init());
+    (void)py::class_<RefKeyType, Type, std::shared_ptr<RefKeyType>>(m_sub, "RefKeyType").def(py::init());
+    (void)py::class_<RefType, Type, std::shared_ptr<RefType>>(m_sub, "RefType").def(py::init());
+    (void)py::class_<TypeAnything, Type, std::shared_ptr<TypeAnything>>(m_sub, "TypeAnything").def(py::init());
+    (void)py::class_<Slice, Type, std::shared_ptr<Slice>>(m_sub, "Slice").def(py::init());
+    (void)py::class_<TypeEllipsis, Type, std::shared_ptr<TypeEllipsis>>(m_sub, "TypeEllipsis").def(py::init());
+  }));
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/ir/func_graph.cc b/mindspore/core/ir/func_graph.cc
similarity index 99%
rename from mindspore/ccsrc/ir/func_graph.cc
rename to mindspore/core/ir/func_graph.cc
index 4e01e9003f6..fabdd3e7d32 100644
--- a/mindspore/ccsrc/ir/func_graph.cc
+++ b/mindspore/core/ir/func_graph.cc
@@ -24,8 +24,7 @@
 
 #include "debug/trace.h"
 #include "ir/manager.h"
-#include "operator/ops.h"
-#include "pybind_api/export_flags.h"
+#include "frontend/operator/ops.h"
 #include "utils/ordered_set.h"
 #include "utils/convert_utils_base.h"
 
@@ -45,7 +44,8 @@ FuncGraph::FuncGraph()
       hyper_param_count_(0),
       is_generated_(false),
       return_(nullptr),
-      manager_(std::weak_ptr<FuncGraphManager>()) {
+      manager_(std::weak_ptr<FuncGraphManager>()),
+      stub_(false) {
   debug_info_ = std::make_shared<GraphDebugInfo>();
 }
 
diff --git a/mindspore/ccsrc/ir/func_graph.h b/mindspore/core/ir/func_graph.h
similarity index 99%
rename from mindspore/ccsrc/ir/func_graph.h
rename to mindspore/core/ir/func_graph.h
index b1be892a539..712c75b4315 100644
--- a/mindspore/ccsrc/ir/func_graph.h
+++ b/mindspore/core/ir/func_graph.h
@@ -149,7 +149,6 @@ class FuncGraph : public FuncGraphBase {
 
   // get the graph's abstract
   abstract::AbstractFunctionPtr abstract();
-  abstract::AbstractBasePtr MakeAbstractClosure(const abstract::AnalysisContextPtr &context);
 
   // return the graph's output, or nullptr if not yet deduced
   AnfNodePtr output() const;
@@ -344,6 +343,9 @@ class FuncGraph : public FuncGraphBase {
   void SetEffectDepends(const std::vector<AnfNodePtr> &depend_inputs);
   bool HasEffect(const CNodePtr &cnode);
 
+  bool stub() const { return stub_; }
+  void set_stub(bool stub) { stub_ = stub; }
+
  private:
   // graph is manipulated by manager and others
   friend FuncGraphManager;
@@ -402,6 +404,7 @@ class FuncGraph : public FuncGraphBase {
 
   // CNode order which relates to origin code order
   std::list<CNodePtr> order_;
+  bool stub_;
 };
 
 inline CNodePtr NewCNode(const std::vector<AnfNodePtr> &inputs, const FuncGraphPtr &fg) {
diff --git a/mindspore/ccsrc/ir/func_graph_cloner.cc b/mindspore/core/ir/func_graph_cloner.cc
similarity index 97%
rename from mindspore/ccsrc/ir/func_graph_cloner.cc
rename to mindspore/core/ir/func_graph_cloner.cc
index 4a0c69d99a1..0857770cad5 100644
--- a/mindspore/ccsrc/ir/func_graph_cloner.cc
+++ b/mindspore/core/ir/func_graph_cloner.cc
@@ -19,8 +19,8 @@
 #include <algorithm>
 
 #include "ir/manager.h"
-#include "ir/param_value_py.h"
-#include "operator/ops.h"
+#include "ir/param_value.h"
+#include "frontend/operator/ops.h"
 #include "utils/convert_utils_base.h"
 #include "utils/log_adapter.h"
 #include "utils/profile.h"
@@ -71,9 +71,8 @@ void Cloner::CloneParameter(const AnfNodePtr &node, const FuncGraphPtr &target,
   new_param->set_abstract(old_param->abstract());
   new_param->set_name(old_param->name());
   if (old_param->has_default()) {
-    auto param_value = std::dynamic_pointer_cast<ParamValuePy>(old_param->default_param());
-    auto param_value_new = std::make_shared<ParamValuePy>(param_value->value());
-    new_param->set_default_param(param_value_new);
+    // Default parameter can be shared since it is readonly.
+    new_param->set_default_param(old_param->default_param());
   }
   ScopePtr scope = (node->scope() != kDefaultScope) ? node->scope() : this->scope();
   new_param->set_scope(scope);
@@ -219,6 +218,7 @@ void Cloner::SetFuncGraphInfo(const FuncGraphPtr &func_graph, FuncGraphPtr *cons
   (*target_func_graph)->set_kwonlyargs_count(func_graph->kwonlyargs_count());
   (*target_func_graph)->set_hyper_param_count(func_graph->hyper_param_count());
   (*target_func_graph)->set_is_generate(func_graph->is_generated());
+  (*target_func_graph)->set_stub(func_graph->stub());
   TraceManager::EndTrace();
 }
 
@@ -253,9 +253,8 @@ void Cloner::CloneParameter(const ParameterPtr &param, const AnfNodePtr &node) {
   if (node->isa<Parameter>()) {
     ParameterPtr old_param = dyn_cast<Parameter>(node);
     if (old_param->has_default()) {
-      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(old_param->default_param());
-      auto param_value_new = std::make_shared<ParamValuePy>(param_value->value());
-      param->set_default_param(param_value_new);
+      // Default parameter can be shared since it is readonly.
+      param->set_default_param(old_param->default_param());
     }
     param->set_name(old_param->name());
   }
@@ -631,6 +630,7 @@ FuncGraphPtr TransformableClone(const FuncGraphPtr &func_graph, const TraceInfoP
   new_func_graph->set_kwonlyargs_count(func_graph->kwonlyargs_count());
   new_func_graph->set_hyper_param_count(func_graph->hyper_param_count());
   new_func_graph->set_is_generate(func_graph->is_generated());
+  new_func_graph->set_stub(func_graph->stub());
   for (auto &item : func_graph->parameter_default_value()) {
     new_func_graph->set_param_default_value(item.first, cloner[item.second]);
   }
diff --git a/mindspore/ccsrc/ir/func_graph_cloner.h b/mindspore/core/ir/func_graph_cloner.h
similarity index 100%
rename from mindspore/ccsrc/ir/func_graph_cloner.h
rename to mindspore/core/ir/func_graph_cloner.h
diff --git a/mindspore/ccsrc/ir/func_graph_extends.cc b/mindspore/core/ir/func_graph_extends.cc
similarity index 96%
rename from mindspore/ccsrc/ir/func_graph_extends.cc
rename to mindspore/core/ir/func_graph_extends.cc
index ad7aa6ee0cb..579409b05e2 100644
--- a/mindspore/ccsrc/ir/func_graph_extends.cc
+++ b/mindspore/core/ir/func_graph_extends.cc
@@ -22,12 +22,9 @@
 
 #include "ir/manager.h"
 #include "ir/func_graph_cloner.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/ordered_set.h"
-#include "pipeline/static_analysis/abstract_value.h"
-#include "pipeline/static_analysis/static_analysis.h"
-#include "pipeline/static_analysis/abstract_function.h"
-
+#include "abstract/abstract_value.h"
 #include "debug/anf_ir_dump.h"
 #include "debug/trace.h"
 #include "debug/draw.h"
@@ -60,14 +57,6 @@ AbstractFunctionPtr FuncGraph::abstract() {
   return std::make_shared<VirtualAbstractClosure>(args_spec_list, output()->abstract());
 }
 
-abstract::AbstractBasePtr FuncGraph::MakeAbstractClosure(const abstract::AnalysisContextPtr &context) {
-  AnalysisContextPtr temp_context = context;
-  if (temp_context == nullptr) {
-    temp_context = abstract::AnalysisContext::DummyContext();
-  }
-  return std::make_shared<abstract::FuncGraphAbstractClosure>(shared_from_base<FuncGraph>(), temp_context);
-}
-
 void FuncGraph::set_output(const AnfNodePtr &value, bool force_new_ret) {
   if (force_new_ret || return_ == nullptr) {
     std::vector<AnfNodePtr> params({NewValueNode(prim::kPrimReturn), value});
diff --git a/mindspore/core/ir/func_graph_py.cc b/mindspore/core/ir/func_graph_py.cc
new file mode 100644
index 00000000000..cff25b5aa1c
--- /dev/null
+++ b/mindspore/core/ir/func_graph_py.cc
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <string>
+#include "ir/meta_func_graph.h"
+#include "ir/func_graph.h"
+
+#include "pybind_api/api_register.h"
+#include "pybind_api/export_flags.h"
+
+namespace mindspore {
+REGISTER_PYBIND_DEFINE(FuncGraph, ([](const pybind11::module *m) {
+                         // Define python "MetaFuncGraph_" class
+                         (void)py::class_<MetaFuncGraph, std::shared_ptr<MetaFuncGraph>>(*m, "MetaFuncGraph_")
+                           .def_readonly(PYTHON_METAFUNCGRAPH_FLAG, &MetaFuncGraph::parse_info_)
+                           .def(py::init<std::string &>());
+                         // Define python "FuncGraph" class
+                         (void)py::class_<FuncGraph, FuncGraphPtr>(*m, "FuncGraph")
+                           .def(py::init())
+                           .def("str", &FuncGraph::ToString, "Get FuncGraph string representation.")
+                           .def("get_return", &FuncGraph::get_return, "Get return node of FuncGraph");
+                       }));
+}  // namespace mindspore
diff --git a/mindspore/core/ir/kernel_info_dev.h b/mindspore/core/ir/kernel_info_dev.h
new file mode 100644
index 00000000000..87c717bdcb3
--- /dev/null
+++ b/mindspore/core/ir/kernel_info_dev.h
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_IR_KERNEL_INFO_DEV_H_
+#define MINDSPORE_CCSRC_IR_KERNEL_INFO_DEV_H_
+
+#include <memory>
+
+namespace mindspore {
+// Interface for device kernel program information.
+class KernelInfoDevice {
+ public:
+  // If kernel program was built and build info is set.
+  virtual bool has_build_info() const = 0;
+};
+using KernelInfoDevicePtr = std::shared_ptr<KernelInfoDevice>;
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_IR_KERNEL_INFO_DEV_H_
diff --git a/mindspore/ccsrc/ir/lite/param_value_lite.h b/mindspore/core/ir/lite/param_value_lite.h
similarity index 97%
rename from mindspore/ccsrc/ir/lite/param_value_lite.h
rename to mindspore/core/ir/lite/param_value_lite.h
index 2b249cfa4f4..1da9b915c22 100644
--- a/mindspore/ccsrc/ir/lite/param_value_lite.h
+++ b/mindspore/core/ir/lite/param_value_lite.h
@@ -19,7 +19,7 @@
 
 #include <memory>
 
-#include "ir/anf.h"
+#include "ir/param_value.h"
 
 namespace mindspore {
 class ParamValueLite : public ParamValue {
diff --git a/mindspore/ccsrc/ir/lite/tensor.cc b/mindspore/core/ir/lite/tensor.cc
similarity index 100%
rename from mindspore/ccsrc/ir/lite/tensor.cc
rename to mindspore/core/ir/lite/tensor.cc
diff --git a/mindspore/ccsrc/ir/lite/tensor.h b/mindspore/core/ir/lite/tensor.h
similarity index 100%
rename from mindspore/ccsrc/ir/lite/tensor.h
rename to mindspore/core/ir/lite/tensor.h
diff --git a/mindspore/ccsrc/ir/manager.cc b/mindspore/core/ir/manager.cc
similarity index 99%
rename from mindspore/ccsrc/ir/manager.cc
rename to mindspore/core/ir/manager.cc
index cf56500aeae..00c39679cd5 100644
--- a/mindspore/ccsrc/ir/manager.cc
+++ b/mindspore/core/ir/manager.cc
@@ -26,7 +26,7 @@
 #include "ir/func_graph.h"
 #include "utils/profile.h"
 #include "utils/convert_utils_base.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 
diff --git a/mindspore/ccsrc/ir/manager.h b/mindspore/core/ir/manager.h
similarity index 100%
rename from mindspore/ccsrc/ir/manager.h
rename to mindspore/core/ir/manager.h
diff --git a/mindspore/ccsrc/ir/meta_func_graph.cc b/mindspore/core/ir/meta_func_graph.cc
similarity index 75%
rename from mindspore/ccsrc/ir/meta_func_graph.cc
rename to mindspore/core/ir/meta_func_graph.cc
index 3b2704613a7..c0cf9d4d2f2 100644
--- a/mindspore/ccsrc/ir/meta_func_graph.cc
+++ b/mindspore/core/ir/meta_func_graph.cc
@@ -17,22 +17,9 @@
  */
 
 #include "ir/meta_func_graph.h"
-#include "pipeline/static_analysis/static_analysis.h"
-#include "pipeline/static_analysis/abstract_function.h"
 
 // namespace to support intermediate representation definition
 namespace mindspore {
-abstract::AbstractBasePtr MetaFuncGraph::MakeAbstractClosure(const AnfNodePtr &anf_node) {
-  abstract::MetaFuncGraphAbstractClosurePtr meta_func_graph_fn;
-  if (anf_node == nullptr) {
-    meta_func_graph_fn = std::make_shared<abstract::MetaFuncGraphAbstractClosure>(shared_from_base<MetaFuncGraph>());
-  } else {
-    meta_func_graph_fn =
-      std::make_shared<abstract::MetaFuncGraphAbstractClosure>(shared_from_base<MetaFuncGraph>(), anf_node->scope());
-  }
-  return meta_func_graph_fn;
-}
-
 FuncGraphPtr MetaFuncGraph::GenerateFuncGraph(const abstract::AbstractBasePtrList &args_spec_list) {
   TypePtrList types;
   (void)std::transform(args_spec_list.begin(), args_spec_list.end(), std::back_inserter(types),
diff --git a/mindspore/ccsrc/ir/meta_func_graph.h b/mindspore/core/ir/meta_func_graph.h
similarity index 94%
rename from mindspore/ccsrc/ir/meta_func_graph.h
rename to mindspore/core/ir/meta_func_graph.h
index f63f812f9ea..933c3f700d8 100644
--- a/mindspore/ccsrc/ir/meta_func_graph.h
+++ b/mindspore/core/ir/meta_func_graph.h
@@ -26,15 +26,11 @@
 #include <vector>
 #include <algorithm>
 
-#include "pybind11/pybind11.h"
-
 #include "ir/dtype.h"
 #include "ir/anf.h"
 #include "ir/func_graph.h"
 #include "ir/signature.h"
-#include "pipeline/static_analysis/abstract_value.h"
-
-namespace py = pybind11;
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 // namespace to support intermediate representation definition
@@ -48,7 +44,6 @@ class MetaFuncGraph : public FuncGraphBase {
   ~MetaFuncGraph() override = default;
 
   MS_DECLARE_PARENT(MetaFuncGraph, FuncGraphBase);
-  abstract::AbstractBasePtr MakeAbstractClosure(const AnfNodePtr &anf_node);
   // Return normalized versions of the arguments.
   // By default, this returns args unchanged.
   virtual abstract::AbstractBasePtrList NormalizeArgs(const abstract::AbstractBasePtrList &args_spec_list) const {
diff --git a/mindspore/ccsrc/ir/meta_tensor.cc b/mindspore/core/ir/meta_tensor.cc
similarity index 100%
rename from mindspore/ccsrc/ir/meta_tensor.cc
rename to mindspore/core/ir/meta_tensor.cc
diff --git a/mindspore/ccsrc/ir/meta_tensor.h b/mindspore/core/ir/meta_tensor.h
similarity index 99%
rename from mindspore/ccsrc/ir/meta_tensor.h
rename to mindspore/core/ir/meta_tensor.h
index a8c07d6992c..00106215e8a 100644
--- a/mindspore/ccsrc/ir/meta_tensor.h
+++ b/mindspore/core/ir/meta_tensor.h
@@ -22,7 +22,7 @@
 #include <memory>
 #include <string>
 
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/dtype.h"
 #include "utils/convert_utils.h"
 #include "utils/hashing.h"
diff --git a/mindspore/ccsrc/ir/meta_tensor_extends.cc b/mindspore/core/ir/meta_tensor_extends.cc
similarity index 96%
rename from mindspore/ccsrc/ir/meta_tensor_extends.cc
rename to mindspore/core/ir/meta_tensor_extends.cc
index 87f1db95e5c..d73aa193742 100644
--- a/mindspore/ccsrc/ir/meta_tensor_extends.cc
+++ b/mindspore/core/ir/meta_tensor_extends.cc
@@ -22,7 +22,7 @@
 #include <sstream>
 #include <string>
 
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 namespace tensor {
diff --git a/mindspore/ccsrc/ir/named.cc b/mindspore/core/ir/named.cc
similarity index 96%
rename from mindspore/ccsrc/ir/named.cc
rename to mindspore/core/ir/named.cc
index 9e1a7968b8d..802f0c8693a 100644
--- a/mindspore/ccsrc/ir/named.cc
+++ b/mindspore/core/ir/named.cc
@@ -15,7 +15,7 @@
  */
 
 #include "ir/named.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 bool Named::operator==(const Value &other) const {
diff --git a/mindspore/ccsrc/ir/named.h b/mindspore/core/ir/named.h
similarity index 100%
rename from mindspore/ccsrc/ir/named.h
rename to mindspore/core/ir/named.h
diff --git a/mindspore/ccsrc/ir/optimizer_caller.h b/mindspore/core/ir/optimizer_caller.h
similarity index 100%
rename from mindspore/ccsrc/ir/optimizer_caller.h
rename to mindspore/core/ir/optimizer_caller.h
diff --git a/mindspore/core/ir/param_value.h b/mindspore/core/ir/param_value.h
new file mode 100644
index 00000000000..00b79ae91ca
--- /dev/null
+++ b/mindspore/core/ir/param_value.h
@@ -0,0 +1,95 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_IR_PARAM_VALUE_H_
+#define MINDSPORE_CCSRC_IR_PARAM_VALUE_H_
+
+#include <atomic>
+#include <memory>
+#include <string>
+#include <vector>
+#include "ir/anf.h"
+#include "ir/tensor.h"
+
+namespace mindspore {
+
+class ParamValue {
+ public:
+  ParamValue() {}
+
+  ParamValue(const ParamValue &other) = default;
+
+  ~ParamValue() = default;
+
+  tensor::MetaTensorPtr value() const { return value_; }
+  void set_value(const tensor::MetaTensorPtr &value) { value_ = value; }
+
+  const std::string &name() const { return name_; }
+  void set_name(const std::string &name) { name_ = name; }
+
+  const std::string &sparse_grad() const { return sparse_grad_; }
+  void set_sparse_grad(const std::string &sparse_grad) { sparse_grad_ = sparse_grad; }
+
+  bool requires_grad() const { return requires_grad_; }
+  void set_requires_grad(bool requires_grad) { requires_grad_ = requires_grad; }
+
+  bool layerwise_parallel() const { return layerwise_parallel_; }
+  void set_layerwise_parallel(bool layerwise_parallel) { layerwise_parallel_ = layerwise_parallel; }
+
+  bool has_indexed_slices_grad() const { return has_indexed_slices_grad_; }
+  void set_has_indexed_slices_grad(bool b) { has_indexed_slices_grad_ = b; }
+
+  // Whether the parameter clone from other parameter.
+  bool cloned() const { return cloned_; }
+
+  // Whether the parameter is cloned.
+  bool be_cloned() const { return be_cloned_; }
+
+  // If the parameter is cloned, generate one index per clone.
+  const std::vector<int32_t> &be_cloned_index() const { return be_cloned_index_; }
+
+  // If the parameter clone from other parameter, it has a unique index.
+  int32_t cloned_index() const { return cloned_index_; }
+
+  // Make a cloned parameter and update clone info.
+  ParamValuePtr Clone() {
+    static std::atomic<int32_t> parameter_cloned_index{1};
+    int32_t index = parameter_cloned_index.fetch_add(1, std::memory_order_relaxed);
+    auto clone = std::make_shared<ParamValue>(*this);
+    clone->be_cloned_ = false;
+    clone->cloned_ = true;
+    clone->be_cloned_index_ = {};
+    clone->cloned_index_ = index;
+    this->be_cloned_ = true;
+    this->be_cloned_index_.push_back(index);
+    return clone;
+  }
+
+ private:
+  tensor::MetaTensorPtr value_;
+  std::string name_{"Parameter"};
+  std::string sparse_grad_;
+  bool requires_grad_{true};
+  bool layerwise_parallel_{false};
+  bool has_indexed_slices_grad_{false};
+  bool be_cloned_{false};
+  bool cloned_{false};
+  std::vector<int32_t> be_cloned_index_;
+  int32_t cloned_index_{0};
+};
+
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_IR_PARAM_VALUE_H_
diff --git a/mindspore/core/ir/param_value_py.cc b/mindspore/core/ir/param_value_py.cc
new file mode 100644
index 00000000000..fb4b313c228
--- /dev/null
+++ b/mindspore/core/ir/param_value_py.cc
@@ -0,0 +1,55 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ir/param_value.h"
+#include "pybind11/pybind11.h"
+#include "pybind_api/api_register.h"
+
+namespace mindspore {
+namespace py = pybind11;
+
+REGISTER_PYBIND_DEFINE(ParamValue, ([](const py::module *m) {
+                         (void)py::class_<ParamValue, ParamValuePtr>(*m, "ParamValue")
+                           .def(py::init())
+                           .def("clone", &ParamValue::Clone)
+                           .def_property("data", &ParamValue::value, &ParamValue::set_value)
+                           .def_property("name", &ParamValue::name, &ParamValue::set_name)
+                           .def_property("requires_grad", &ParamValue::requires_grad, &ParamValue::set_requires_grad)
+                           .def_property("layerwise_parallel", &ParamValue::layerwise_parallel,
+                                         &ParamValue::set_layerwise_parallel)
+                           .def_property("has_indexed_slices_grad", &ParamValue::has_indexed_slices_grad,
+                                         &ParamValue::set_has_indexed_slices_grad)
+                           .def_property("sparse_grad", &ParamValue::sparse_grad, &ParamValue::set_sparse_grad)
+                           .def(py::pickle(
+                             [](const ParamValue &p) {  // __getstate__
+                               return py::make_tuple(py::cast(p.value()), p.name(), p.requires_grad(),
+                                                     p.layerwise_parallel(), p.has_indexed_slices_grad(),
+                                                     p.sparse_grad());
+                             },
+                             [](const py::tuple &t) {  // __setstate__
+                               if (t.size() != 6) {
+                                 std::runtime_error("Invalid state for ParamValue!");
+                               }
+                               ParamValuePtr p = std::make_shared<ParamValue>();
+                               p->set_value(t[0].cast<tensor::TensorPtr>());
+                               p->set_name(t[1].cast<std::string>());
+                               p->set_requires_grad(t[2].cast<bool>());
+                               p->set_layerwise_parallel(t[3].cast<bool>());
+                               p->set_has_indexed_slices_grad(t[4].cast<bool>());
+                               p->set_sparse_grad(t[5].cast<std::string>());
+                               return p;
+                             }));
+                       }));
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/ir/pattern_matcher.h b/mindspore/core/ir/pattern_matcher.h
similarity index 99%
rename from mindspore/ccsrc/ir/pattern_matcher.h
rename to mindspore/core/ir/pattern_matcher.h
index 6605b9ce4c8..94ba4a381a2 100644
--- a/mindspore/ccsrc/ir/pattern_matcher.h
+++ b/mindspore/core/ir/pattern_matcher.h
@@ -21,7 +21,7 @@
 #include <vector>
 
 #include "ir/anf.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 
diff --git a/mindspore/ccsrc/ir/primitive_base.cc b/mindspore/core/ir/primitive.cc
similarity index 95%
rename from mindspore/ccsrc/ir/primitive_base.cc
rename to mindspore/core/ir/primitive.cc
index 864427fe13e..352c0f31ae9 100644
--- a/mindspore/ccsrc/ir/primitive_base.cc
+++ b/mindspore/core/ir/primitive.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ir/primitive_base.h"
+#include "ir/primitive.h"
 
 #include <utility>
 
diff --git a/mindspore/ccsrc/ir/primitive_base.h b/mindspore/core/ir/primitive.h
similarity index 90%
rename from mindspore/ccsrc/ir/primitive_base.h
rename to mindspore/core/ir/primitive.h
index b34c43d00e6..5471b580637 100644
--- a/mindspore/ccsrc/ir/primitive_base.h
+++ b/mindspore/core/ir/primitive.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_CCSRC_IR_PRIMITIVE_BASE_H_
-#define MINDSPORE_CCSRC_IR_PRIMITIVE_BASE_H_
+#ifndef MINDSPORE_CCSRC_IR_PRIMITIVE_H_
+#define MINDSPORE_CCSRC_IR_PRIMITIVE_H_
 
 #include <unordered_map>
 #include <vector>
@@ -24,9 +24,9 @@
 #include <tuple>
 
 #include "ir/dtype/type.h"
-#include "pybind11/pybind11.h"
-
-namespace py = pybind11;
+#include "abstract/abstract_value.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "utils/base_ref_extends.h"
 
 namespace mindspore {
 // Supported meta type
@@ -114,6 +114,8 @@ class Primitive : public Named {
   void set_has_signature(bool has_signature) { has_signature_ = has_signature; }
   bool has_signature() const { return has_signature_; }
   bool is_base() const { return is_base_; }
+  virtual BaseRef RunHookFunction(const VectorRef &args) const { MS_LOG(EXCEPTION) << "call a empty function!"; }
+  virtual void CopyHookFunction(const PrimitivePtr &primitive) { MS_LOG(EXCEPTION) << "call a empty function!"; }
 
  protected:
   std::unordered_map<std::string, ValuePtr> attrs_;
@@ -147,4 +149,4 @@ struct PrimitiveHasher {
   }
 };
 }  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_IR_PRIMITIVE_BASE_H_
+#endif  // MINDSPORE_CCSRC_IR_PRIMITIVE_H_
diff --git a/mindspore/ccsrc/ir/primitive.cc b/mindspore/core/ir/primitive_py.cc
similarity index 60%
rename from mindspore/ccsrc/ir/primitive.cc
rename to mindspore/core/ir/primitive_py.cc
index 6ec27c2567b..1a97487ddc5 100644
--- a/mindspore/ccsrc/ir/primitive.cc
+++ b/mindspore/core/ir/primitive_py.cc
@@ -14,33 +14,55 @@
  * limitations under the License.
  */
 
-#include "ir/primitive.h"
+#include "ir/primitive_py.h"
 #include <mutex>
 #include <utility>
 #include "ir/signature.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "./common.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/data_converter.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/data_converter.h"
 #include "pybind11/pytypes.h"
 #include "utils/convert_utils_base.h"
 #include "utils/primitive_utils.h"
-
+#include "utils/base_ref_py.h"
 #include "pybind_api/api_register.h"
 #include "pybind_api/export_flags.h"
 
 namespace mindspore {
+namespace {
+constexpr auto kBpropAttrName = "bprop";
+constexpr auto kCellHookAttrName = "cell_hook";
+constexpr auto kCellIDAttrName = "cell_id";
+void SyncData(const py::object &arg) {
+  if (py::isinstance<py::tuple>(arg)) {
+    py::tuple arg_list = py::cast<py::tuple>(arg);
+    for (size_t i = 0; i < arg_list.size(); i++) {
+      SyncData(arg_list[i]);
+    }
+  }
+  if (py::isinstance<tensor::Tensor>(arg)) {
+    auto tensor = py::cast<tensor::TensorPtr>(arg);
+    (void)tensor->data_sync();
+  }
+}
+}  // namespace
+std::map<std::string, py::object> PrimitivePy::hook_grad_;
+static ValuePtr PyArgToValue(const py::object &arg) {
+  if (py::isinstance<SignatureEnumKind>(arg) &&
+      py::cast<SignatureEnumKind>(arg) == SignatureEnumKind::kKindEmptyDefaultValue) {
+    return nullptr;
+  }
+  return parse::data_converter::PyDataToValue(arg);
+}
+
 void PrimitivePy::set_signatures(
   std::vector<std::tuple<std::string, SignatureEnumRW, SignatureEnumKind, py::object, SignatureEnumDType>> signatures) {
   signatures_.clear();
   for (auto &signature : signatures) {
-    std::string name;
-    SignatureEnumRW rw;
-    SignatureEnumKind kind;
-    py::object default_value;
-    SignatureEnumDType dtype;
-    std::tie(name, rw, kind, default_value, dtype) = signature;
-    signatures_.emplace_back(Signature(name, rw, kind, default_value, dtype));
+    auto [name, rw, kind, arg_default, dtype] = signature;
+    auto default_value = PyArgToValue(arg_default);
+    signatures_.emplace_back(name, rw, kind, default_value, dtype);
   }
   set_has_signature(true);
 }
@@ -56,6 +78,51 @@ py::function PrimitivePy::GetBpropFunction() {
   }
 }
 
+BaseRef PrimitivePy::RunHookFunction(const VectorRef &args) const {
+  auto py_args = py::tuple(args.size());
+  size_t i = 0;
+  for (auto &arg : args) {
+    py_args[i] = BaseRefToPyData(arg);
+    MS_LOG(DEBUG) << "arg:" << i << ":";
+    i++;
+  }
+  py::object obj;
+  bool is_bprop = this->HasAttr(kBpropAttrName);
+  if (is_bprop) {
+    SyncData(py_args);
+    obj = hook_(*py_args);
+    return std::make_shared<PyObjectRef>(obj);
+  }
+  SyncData(py_args[2]);
+  bool is_cell = this->HasAttr(kCellHookAttrName);
+  if (is_cell) {
+    auto cell_id = GetValue<std::string>(this->GetAttr(kCellIDAttrName));
+    auto iter = hook_grad_.find(cell_id);
+    if (iter != hook_grad_.end()) {
+      auto hook_args = py::tuple(3);
+      hook_args[0] = cell_id;
+      hook_args[1] = py::make_tuple(iter->second);
+      hook_args[2] = py::make_tuple(py_args[2]);
+      obj = hook_(*hook_args);
+      if (py::isinstance<py::none>(obj)) {
+        obj = py_args[2];
+      }
+      hook_grad_.erase(cell_id);
+    } else {
+      hook_grad_[cell_id] = py_args[2];
+      obj = py_args[2];
+    }
+  } else {
+    // Hook operator for execute variable hook function
+    obj = hook_(py::make_tuple(py_args[2]));
+    if (py::isinstance<py::none>(obj)) {
+      obj = py_args[2];
+    }
+  }
+  obj = py::make_tuple(obj);
+  return std::make_shared<PyObjectRef>(obj);
+}
+
 py::function PrimitivePy::GetComputeFunction() {
   static const char *const compute_func_name = "vm_impl";
 
@@ -99,6 +166,16 @@ py::dict PrimitivePy::GetAttrDict() {
   return attr_dict;
 }
 
+void PrimitivePy::CopyHookFunction(const PrimitivePtr &primitive) {
+  MS_EXCEPTION_IF_NULL(primitive);
+  if (!primitive->isa<PrimitivePy>()) {
+    MS_LOG(EXCEPTION) << "Cannot copy a primtive which is not python primitive hook function to python primitive!";
+  }
+  auto primitive_py = primitive->cast<PrimitivePyPtr>();
+  MS_EXCEPTION_IF_NULL(primitive_py);
+  this->set_hook(primitive_py->hook());
+}
+
 REGISTER_PYBIND_DEFINE(Primitive_, ([](const py::module *m) {
                          (void)py::enum_<PrimType>(*m, "prim_type", py::arithmetic())
                            .value("unknown", PrimType::kPrimTypeUnknown)
diff --git a/mindspore/ccsrc/ir/primitive.h b/mindspore/core/ir/primitive_py.h
similarity index 78%
rename from mindspore/ccsrc/ir/primitive.h
rename to mindspore/core/ir/primitive_py.h
index 257302c0c49..2dc45ac341e 100644
--- a/mindspore/ccsrc/ir/primitive.h
+++ b/mindspore/core/ir/primitive_py.h
@@ -14,22 +14,25 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_CCSRC_IR_PRIMITIVE_H_
-#define MINDSPORE_CCSRC_IR_PRIMITIVE_H_
+#ifndef MINDSPORE_CCSRC_IR_PRIMITIVE_PY_H_
+#define MINDSPORE_CCSRC_IR_PRIMITIVE_PY_H_
 
 #include <unordered_map>
 #include <vector>
 #include <memory>
 #include <string>
 #include <tuple>
+#include <map>
 
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #include "utils/misc.h"
+#include "pybind11/pybind11.h"
 #include "utils/log_adapter.h"
-#include "ir/primitive_base.h"
+#include "ir/primitive.h"
 #include "ir/signature.h"
-#include "parallel/ops_info/operator_info.h"
+#include "frontend/parallel/ops_info/operator_info.h"
 
+namespace py = pybind11;
 namespace mindspore {
 class PrimitivePy : public Primitive {
  public:
@@ -46,12 +49,14 @@ class PrimitivePy : public Primitive {
 
   const std::vector<Signature> &signatures() const { return signatures_; }
 
+  void CopyHookFunction(const PrimitivePtr &primitive) override;
+
   void AddPyAttr(const py::str &name, const py::object &obj);
 
   py::dict GetAttrDict();
   void set_hook(const py::function &hook) { hook_ = hook; }
   py::function hook() const { return hook_; }
-
+  BaseRef RunHookFunction(const VectorRef &args) const override;
   const bool parse_info_ = true;
   const py::object &GetPyObj() const { return python_obj_; }
   bool is_tuple_input_ = false;
@@ -60,8 +65,9 @@ class PrimitivePy : public Primitive {
   py::object python_obj_;
   py::function hook_;
   std::vector<Signature> signatures_;
+  static std::map<std::string, py::object> hook_grad_;
 };
 
 using PrimitivePyPtr = std::shared_ptr<PrimitivePy>;
 }  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_IR_PRIMITIVE_H_
+#endif  // MINDSPORE_CCSRC_IR_PRIMITIVE_PY_H_
diff --git a/mindspore/ccsrc/ir/scalar.h b/mindspore/core/ir/scalar.h
similarity index 99%
rename from mindspore/ccsrc/ir/scalar.h
rename to mindspore/core/ir/scalar.h
index e8e29fb2f91..adae8c65f9f 100644
--- a/mindspore/ccsrc/ir/scalar.h
+++ b/mindspore/core/ir/scalar.h
@@ -27,7 +27,7 @@
 #include <utility>
 #include <cfloat>
 
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/dtype.h"
 #include "ir/dtype/number.h"
 
diff --git a/mindspore/ccsrc/ir/scope.cc b/mindspore/core/ir/scope.cc
similarity index 100%
rename from mindspore/ccsrc/ir/scope.cc
rename to mindspore/core/ir/scope.cc
diff --git a/mindspore/ccsrc/ir/scope.h b/mindspore/core/ir/scope.h
similarity index 100%
rename from mindspore/ccsrc/ir/scope.h
rename to mindspore/core/ir/scope.h
diff --git a/mindspore/ccsrc/ir/signature.h b/mindspore/core/ir/signature.h
similarity index 85%
rename from mindspore/ccsrc/ir/signature.h
rename to mindspore/core/ir/signature.h
index 48be7e0f315..e9a5a2e1ca5 100644
--- a/mindspore/ccsrc/ir/signature.h
+++ b/mindspore/core/ir/signature.h
@@ -16,14 +16,11 @@
 
 #ifndef MINDSPORE_CCSRC_IR_SIGNATURE_H_
 #define MINDSPORE_CCSRC_IR_SIGNATURE_H_
+
 #include <string>
 #include <vector>
-
-#include "pybind11/operators.h"
 #include "ir/value.h"
 
-namespace py = pybind11;
-
 namespace mindspore {
 // Input signature, support type
 enum SignatureEnumRW {
@@ -62,8 +59,10 @@ struct Signature {
   ValuePtr default_value;  // nullptr for no default value
   SignatureEnumDType dtype;
   Signature(const std::string &arg_name, const SignatureEnumRW &rw_tag, const SignatureEnumKind &arg_kind,
-            const py::object &arg_default, const SignatureEnumDType &arg_dtype);
-  Signature(const std::string &arg_name, const SignatureEnumRW &rw_tag, const SignatureEnumKind &arg_kind);
+            const ValuePtr &arg_default, const SignatureEnumDType &arg_dtype)
+      : name(arg_name), rw(rw_tag), kind(arg_kind), default_value(arg_default), dtype(arg_dtype) {}
+  Signature(const std::string &arg_name, const SignatureEnumRW &rw_tag, const SignatureEnumKind &arg_kind)
+      : Signature(arg_name, rw_tag, arg_kind, nullptr, SignatureEnumDType::kDTypeEmptyDefaultValue) {}
 };
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/ir/signature.cc b/mindspore/core/ir/signature_py.cc
similarity index 76%
rename from mindspore/ccsrc/ir/signature.cc
rename to mindspore/core/ir/signature_py.cc
index 8f312d5b981..f513df8533f 100644
--- a/mindspore/ccsrc/ir/signature.cc
+++ b/mindspore/core/ir/signature_py.cc
@@ -15,30 +15,14 @@
  */
 
 #include "ir/signature.h"
-
 #include "pybind11/operators.h"
 #include "pybind_api/api_register.h"
-#include "pipeline/parse/data_converter.h"
+#include "pipeline/jit/parse/data_converter.h"
+
+namespace py = pybind11;
 
 namespace mindspore {
-Signature::Signature(const std::string &arg_name, const SignatureEnumRW &rw_tag, const SignatureEnumKind &arg_kind,
-                     const py::object &arg_default, const SignatureEnumDType &arg_dtype)
-    : name(arg_name), rw(rw_tag), kind(arg_kind), dtype(arg_dtype) {
-  if (py::isinstance<SignatureEnumKind>(arg_default) &&
-      py::cast<SignatureEnumKind>(arg_default) == SignatureEnumKind::kKindEmptyDefaultValue) {
-    default_value = nullptr;
-  } else {
-    default_value = parse::data_converter::PyDataToValue(arg_default);
-  }
-}
-
-Signature::Signature(const std::string &arg_name, const SignatureEnumRW &rw_tag, const SignatureEnumKind &arg_kind)
-    : name(arg_name),
-      rw(rw_tag),
-      kind(arg_kind),
-      default_value(nullptr),
-      dtype(SignatureEnumDType::kDTypeEmptyDefaultValue) {}
-
+// Bind SignatureEnumRW as a python class.
 REGISTER_PYBIND_DEFINE(SignatureEnumRW, ([](const py::module *m) {
                          (void)py::enum_<SignatureEnumRW>(*m, "signature_rw", py::arithmetic())
                            .value("RW_READ", SignatureEnumRW::kRWRead)
diff --git a/mindspore/ccsrc/ir/tensor.cc b/mindspore/core/ir/tensor.cc
similarity index 69%
rename from mindspore/ccsrc/ir/tensor.cc
rename to mindspore/core/ir/tensor.cc
index c06ba2a8203..c04c2cca963 100644
--- a/mindspore/ccsrc/ir/tensor.cc
+++ b/mindspore/core/ir/tensor.cc
@@ -23,12 +23,22 @@
 #include <sstream>
 #include <string>
 #include <utility>
+#include <iomanip>
+#include <algorithm>
+#include <type_traits>
+#include <typeinfo>
 
-#include "device/device_address.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "runtime/device/device_address.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 namespace tensor {
+constexpr auto kEllipsis = "...";
+constexpr auto kThreshold = 6;
+
+constexpr auto kThreshold1DFloat = kThreshold * 2;
+constexpr auto kThreshold1DInt = kThreshold * 4;
+constexpr auto kThreshold1DBool = kThreshold * 2;
 
 static std::string MakeId() {
   // Use atomic to make id generator thread safe.
@@ -115,6 +125,7 @@ template <typename T>
 class TensorDataImpl : public TensorData {
  public:
   explicit TensorDataImpl(const std::vector<int> &shape) : ndim_(shape.size()), data_size_(SizeOf(shape)) {}
+  ~TensorDataImpl() = default;
 
   TensorDataImpl(const std::vector<int> &shape, void *data, size_t data_len)
       : ndim_(shape.size()), data_size_(SizeOf(shape)), data_(CopyData<T>(shape, data, data_len)) {}
@@ -144,8 +155,8 @@ class TensorDataImpl : public TensorData {
       // Prevent null pointer for empty shape.
       return empty_data.data();
     }
+    // Lazy allocation.
     if (data_.empty()) {
-      // Lazy allocation.
       data_.resize(data_size_);
     }
     return data_.data();
@@ -159,24 +170,140 @@ class TensorDataImpl : public TensorData {
     return false;
   }
 
-  std::string ToString() const override {
-    std::ostringstream ss;
-    ss << '[';
-    for (auto value : data_) {
-      ss << value << ',';
+  std::string ToString(const TypeId type, const std::vector<int> &shape) const override {
+    constexpr auto valid =
+      std::is_same<T, Bool>::value || std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value ||
+      std::is_same<T, int16_t>::value || std::is_same<T, int32_t>::value || std::is_same<T, int64_t>::value ||
+      std::is_same<T, uint16_t>::value || std::is_same<T, uint32_t>::value || std::is_same<T, uint64_t>::value ||
+      std::is_same<T, float16>::value || std::is_same<T, float>::value || std::is_same<T, double>::value;
+    static_assert(valid, "Type is invalid");
+    if (data_size_ == 0) {
+      return "";
     }
-    ss << ']';
+    if (data_.empty()) {
+      return "<uninitialized>";
+    }
+
+    std::ostringstream ss;
+    if (data_size_ == 1 && ndim_ == 0) {  // Scalar
+      OutputDataString(ss, type, 0, 0, 1);
+      return ss.str();
+    }
+    ssize_t cursor = 0;
+    SummaryStringRecursive(ss, type, shape, &cursor, 0);
     return ss.str();
   }
 
  private:
+  void OutputDataString(std::ostringstream &ss, const TypeId type, ssize_t cursor, ssize_t start, ssize_t end) const {
+    bool isScalar = ndim_ == 0 && end - start == 1;
+    int linefeedThreshold;
+    constexpr auto isFloat =
+      std::is_same<T, float16>::value || std::is_same<T, float>::value || std::is_same<T, double>::value;
+    for (ssize_t i = start; i < end && (cursor + i) < static_cast<ssize_t>(data_size_); i++) {
+      const auto value = data_[cursor + i];
+      if constexpr (isFloat) {
+        if (isScalar) {
+          ss << value;
+        } else {
+          ss << std::setw(15) << std::setprecision(8) << std::setiosflags(std::ios::scientific | std::ios::right)
+             << value;
+        }
+        linefeedThreshold = kThreshold1DFloat;
+      } else if (type == kNumberTypeBool) {
+        if (isScalar) {
+          ss << (value == 0 ? "False" : "True");
+        } else {
+          ss << std::setw(5) << std::setiosflags(std::ios::right) << (value == 0 ? "False" : "True");
+        }
+        linefeedThreshold = kThreshold1DBool;
+      } else {
+        constexpr auto isSigned = std::is_same<T, int8_t>::value || std::is_same<T, int16_t>::value ||
+                                  std::is_same<T, int32_t>::value || std::is_same<T, int64_t>::value;
+        if constexpr (isSigned) {
+          if (!isScalar && static_cast<int64_t>(value) >= 0) {
+            ss << ' ';
+          }
+        }
+        if constexpr (std::is_same<T, int8_t>::value) {
+          ss << static_cast<int16_t>(value);
+        } else if constexpr (std::is_same<T, uint8_t>::value) {
+          ss << static_cast<uint16_t>(value);
+        } else {
+          ss << value;
+        }
+        linefeedThreshold = kThreshold1DInt;
+      }
+      if (!isScalar && i != end - 1) {
+        ss << ' ';
+      }
+      if (!isScalar && ndim_ == 1 && (i + 1) % linefeedThreshold == 0) {
+        // Add a line feed every {threshold of type} for 1D tensor.
+        ss << '\n' << ' ';
+      }
+    }
+  }
+
+  void SummaryStringRecursive(std::ostringstream &ss, const TypeId type, const std::vector<int> &shape, ssize_t *cursor,
+                              ssize_t depth) const {
+    if (depth >= static_cast<ssize_t>(ndim_)) {
+      return;
+    }
+    ss << '[';
+    if (depth == static_cast<ssize_t>(ndim_) - 1) {  // Bottom dimension
+      ssize_t num = shape[depth];
+      if (num > kThreshold && ndim_ > 1) {
+        OutputDataString(ss, type, *cursor, 0, kThreshold / 2);
+        ss << ' ' << kEllipsis << ' ';
+        OutputDataString(ss, type, *cursor, num - kThreshold / 2, num);
+      } else {
+        OutputDataString(ss, type, *cursor, 0, num);
+      }
+      *cursor += num;
+    } else {  // Middle dimension
+      ssize_t num = shape[depth];
+      // Handle the first half.
+      for (ssize_t i = 0; i < std::min(static_cast<ssize_t>(kThreshold / 2), num); i++) {
+        if (i > 0) {
+          ss << '\n';
+          ss << std::setw(depth + 1) << ' ';  // Add the indent.
+        }
+        SummaryStringRecursive(ss, type, shape, cursor, depth + 1);
+      }
+      // Handle the ignored part.
+      if (num > kThreshold) {
+        ss << '\n';
+        ss << std::setw(depth + 1) << ' ';  // Add the indent.
+        ss << kEllipsis;
+        // Ignored at this layer.
+        ssize_t ignored = shape[depth + 1];
+        for (ssize_t i = depth + 2; i < static_cast<ssize_t>(ndim_); i++) {
+          ignored *= shape[i];
+        }
+        // Multiple with ignored layers number.
+        ignored *= num - kThreshold;
+
+        *cursor += ignored;
+      }
+      // Handle the second half.
+      if (num > kThreshold / 2) {
+        for (ssize_t i = num - kThreshold / 2; i < num; i++) {
+          ss << '\n';
+          ss << std::setw(depth + 1) << ' ';  // Add the indent.
+          SummaryStringRecursive(ss, type, shape, cursor, depth + 1);
+        }
+      }
+    }
+    ss << ']';
+  }
+
   size_t ndim_{0};
   size_t data_size_{0};
   std::vector<T> data_;
 };
 
 template <typename... Args>
-TensorDataPtr MakeTensorData(TypeId data_type, const std::vector<int> &shape, Args... args) {
+TensorDataPtr MakeTensorData(TypeId data_type, const std::vector<int> &shape, const Args... args) {
   switch (data_type) {
     case kNumberTypeBool:
     case kNumberTypeUInt8:
@@ -213,7 +340,7 @@ Tensor::Tensor(const Tensor &tensor)
       data_(tensor.data_),
       dirty_(tensor.dirty_),
       id_(tensor.id_),
-      device_address_(tensor.device_address_) {}
+      device_sync_(tensor.device_sync_) {}
 
 Tensor::Tensor(const Tensor &tensor, TypeId data_type)
     : MetaTensor(data_type, tensor.shape_),
@@ -221,7 +348,7 @@ Tensor::Tensor(const Tensor &tensor, TypeId data_type)
       data_(MakeTensorData(data_type, tensor.shape_, tensor.data_->data(), tensor.data_type_)),
       dirty_(tensor.dirty_),
       id_(tensor.id_),
-      device_address_(tensor.device_address_) {}
+      device_sync_(tensor.device_sync_) {}
 
 Tensor::Tensor(TypeId data_type, const std::vector<int> &shape, TensorDataPtr data)
     : MetaTensor(data_type, shape), data_(std::move(data)), id_(MakeId()) {}
@@ -266,10 +393,10 @@ bool Tensor::ValueEqual(const Tensor &tensor) const {
 Tensor &Tensor::AssignValue(const Tensor &tensor) {
   if (this != &tensor) {
     MetaTensor::operator=(tensor);
-    dirty_ = tensor.is_dirty();
-    device_address_ = tensor.device_address();
+    dirty_ = tensor.dirty_;
+    device_sync_ = tensor.device_sync_;
     data_ = tensor.data_;
-    id_ = tensor.id();
+    id_ = tensor.id_;
   }
   return *this;
 }
@@ -297,7 +424,7 @@ std::string Tensor::ToString() const {
   buf << "Tensor shape:[" << shape() << "]" << this->Dtype()->ToString();
   // only print small tensor
   if (DataSize() < small_tensor_size) {
-    buf << "val:" << data().ToString();
+    buf << ", value:" << data().ToString(data_type_, shape());
   }
   return buf.str();
 }
@@ -307,13 +434,13 @@ std::string Tensor::ToStringRepr() const {
   auto type_ptr = this->Dtype();
   MS_EXCEPTION_IF_NULL(type_ptr);
   buf << "Tensor shape:[" << shape() << "]" << type_ptr->ToString();
-  buf << "\nval:" << data().ToString();
+  buf << "\nvalue:" << data().ToString(data_type_, shape());
   return buf.str();
 }
 
 void Tensor::data_sync() const {
-  if (device_address_ != nullptr) {
-    if (!device_address_->SyncDeviceToHost(shape(), static_cast<size_t>(data().nbytes()), data_type(), data_c())) {
+  if (device_sync_ != nullptr) {
+    if (!device_sync_->SyncDeviceToHost(shape(), static_cast<size_t>(data().nbytes()), data_type(), data_c())) {
       MS_LOG(EXCEPTION) << "SyncDeviceToHost when asnumpy.";
     }
   }
diff --git a/mindspore/ccsrc/ir/tensor.h b/mindspore/core/ir/tensor.h
similarity index 94%
rename from mindspore/ccsrc/ir/tensor.h
rename to mindspore/core/ir/tensor.h
index 5be8a063c11..727fb0fdd8f 100644
--- a/mindspore/ccsrc/ir/tensor.h
+++ b/mindspore/core/ir/tensor.h
@@ -23,15 +23,13 @@
 #include <numeric>
 
 #include "Eigen/Core"
-#include "device/device_address.h"
+#include "ir/device_sync.h"
 #include "ir/meta_tensor.h"
 #include "include/ms_tensor.h"
 #include "utils/log_adapter.h"
 
 using float16 = Eigen::half;
 
-using mindspore::device::DeviceAddress;
-using DeviceAddressPtr = std::shared_ptr<mindspore::device::DeviceAddress>;
 // brief mindspore namespace.
 //
 // mindspore namespace is the top level namespace of MindSpore project.
@@ -57,7 +55,7 @@ class TensorData {
   /// Is data equals.
   virtual bool equals(const TensorData &other) const = 0;
   /// To string.
-  virtual std::string ToString() const = 0;
+  virtual std::string ToString(const TypeId type, const std::vector<int> &shape) const = 0;
 };
 
 using TensorDataPtr = std::shared_ptr<TensorData>;
@@ -180,7 +178,6 @@ class Tensor : public MetaTensor {
 
   // brief Get Tensor data pointer for c++ type
   //
-  // param writable true if writable, false if read only
   // return The pointer to the object
   void *data_c() { return data().data(); }
 
@@ -217,14 +214,14 @@ class Tensor : public MetaTensor {
 
   std::string ToStringRepr() const;
 
-  bool is_init() { return init_flag_; }
+  bool is_init() const { return init_flag_; }
   void set_init_flag(bool flag) { init_flag_ = flag; }
 
   bool is_dirty() const { return dirty_; }
   void set_dirty(const bool dirty) { dirty_ = dirty; }
 
-  DeviceAddressPtr device_address() const { return device_address_; }
-  void set_device_address(const DeviceAddressPtr &device_address) { device_address_ = device_address; }
+  DeviceSyncPtr device_address() const { return device_sync_; }
+  void set_device_address(const DeviceSyncPtr &device_sync) { device_sync_ = device_sync; }
 
   std::string id() const { return id_; }
 
@@ -235,7 +232,7 @@ class Tensor : public MetaTensor {
   TensorDataPtr data_{nullptr};
   bool dirty_{true};
   std::string id_{""};
-  DeviceAddressPtr device_address_{nullptr};
+  DeviceSyncPtr device_sync_{nullptr};
 };
 using TensorPtr = std::shared_ptr<Tensor>;
 using TensorPtrList = std::vector<std::shared_ptr<Tensor>>;
diff --git a/mindspore/ccsrc/ir/tensor_py.cc b/mindspore/core/ir/tensor_py.cc
similarity index 98%
rename from mindspore/ccsrc/ir/tensor_py.cc
rename to mindspore/core/ir/tensor_py.cc
index 11a000cef7d..ef78d2720e2 100644
--- a/mindspore/ccsrc/ir/tensor_py.cc
+++ b/mindspore/core/ir/tensor_py.cc
@@ -22,10 +22,9 @@
 #include <sstream>
 #include <string>
 
-#include "device/device_address.h"
 #include "pybind_api/api_register.h"
 #include "pybind_api/export_flags.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 namespace tensor {
@@ -213,9 +212,28 @@ static std::vector<int> GetShapeFromTuple(const py::tuple &tuple) {
 }
 
 REGISTER_PYBIND_DEFINE(Tensor, ([](const py::module *m) {
+                         // Define python MetaTensor class.
+                         (void)py::class_<MetaTensor, std::shared_ptr<MetaTensor>>(*m, "MetaTensor")
+                           .def(py::init<TypePtr, const std::vector<int>>(), py::arg("dtype"), py::arg("shape"))
+                           .def_readonly(PYTHON_META_TENSOR_FLAG, &MetaTensor::parse_info_)
+                           .def_property_readonly("dtype", &MetaTensor::Dtype, "Get the MetaTensor's dtype.")
+                           .def_property_readonly("shape", &MetaTensor::shape, "Get the MetaTensor's shape.")
+                           .def(py::pickle(
+                             [](const MetaTensor &t) {  // __getstate__
+                               /* Return a tuple that fully encodes the state of the object */
+                               return py::make_tuple(static_cast<int>(t.data_type()), t.shape());
+                             },
+                             [](const py::tuple &t) {  // __setstate__
+                               if (t.size() != 2) {
+                                 throw std::runtime_error("Invalid state!");
+                               }
+                               /* Create a new C++ instance */
+                               MetaTensor tensor(TypeId(t[0].cast<int>()), t[1].cast<std::vector<int>>());
+                               return tensor;
+                             }));
                          // Define python Tensor class.
                          // dtype should define before Tensor, because Tensor init depend dtype
-                         (void)py::class_<Tensor, std::shared_ptr<Tensor>>(*m, "Tensor")
+                         (void)py::class_<Tensor, MetaTensor, std::shared_ptr<Tensor>>(*m, "Tensor")
                            .def(py::init([](const Tensor &tensor) { return std::make_shared<Tensor>(tensor); }),
                                 py::arg("input"))
                            .def(py::init([](const Tensor &tensor, const TypePtr &type_ptr) {
@@ -252,6 +270,7 @@ REGISTER_PYBIND_DEFINE(Tensor, ([](const py::module *m) {
                                 }),
                                 py::arg("input"), py::arg("dtype") = nullptr)
                            .def_readonly(PYTHON_TENSOR_FLAG, &Tensor::parse_info_)
+                           .def_property("init_flag", &Tensor::is_init, &Tensor::set_init_flag)
                            .def_property_readonly("dtype", &Tensor::Dtype, R"mydelimiter(
                              Get the tensor's data type.
 
@@ -365,26 +384,6 @@ REGISTER_PYBIND_DEFINE(Tensor, ([](const py::module *m) {
                                /* Create a new C++ instance */
                                return TensorPy::MakeTensor(t[0].cast<py::array>());
                              }));
-                         // Define python MetaTensor class.
-                         (void)py::class_<MetaTensor, std::shared_ptr<MetaTensor>>(*m, "MetaTensor")
-                           .def(py::init<TypePtr, const std::vector<int>>(), py::arg("dtype"), py::arg("shape"))
-                           .def_readonly(PYTHON_META_TENSOR_FLAG, &MetaTensor::parse_info_)
-                           .def_property_readonly("dtype", &MetaTensor::Dtype, "Get the MetaTensor's dtype.")
-                           .def_property_readonly("shape", &MetaTensor::shape, "Get the MetaTensor's shape.")
-                           .def(py::pickle(
-                             [](const MetaTensor &t) {  // __getstate__
-                               /* Return a tuple that fully encodes the state of the object */
-                               return py::make_tuple(static_cast<int>(t.data_type()), t.shape());
-                             },
-                             [](const py::tuple &t) {  // __setstate__
-                               if (t.size() != 2) {
-                                 throw std::runtime_error("Invalid state!");
-                               }
-                               /* Create a new C++ instance */
-                               MetaTensor tensor(TypeId(t[0].cast<int>()), t[1].cast<std::vector<int>>());
-                               return tensor;
-                             }));
                        }));
-
 }  // namespace tensor
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/ir/tensor_py.h b/mindspore/core/ir/tensor_py.h
similarity index 96%
rename from mindspore/ccsrc/ir/tensor_py.h
rename to mindspore/core/ir/tensor_py.h
index 18ee5470717..f917584977a 100644
--- a/mindspore/ccsrc/ir/tensor_py.h
+++ b/mindspore/core/ir/tensor_py.h
@@ -81,8 +81,6 @@ struct type_caster<float16> : public npy_scalar_caster<float16> {
 }  // namespace detail
 }  // namespace pybind11
 
-using mindspore::device::DeviceAddress;
-using DeviceAddressPtr = std::shared_ptr<mindspore::device::DeviceAddress>;
 // brief mindspore namespace.
 //
 // mindspore namespace is the top level namespace of Mindsporeession project.
diff --git a/mindspore/ccsrc/ir/value.cc b/mindspore/core/ir/value.cc
similarity index 100%
rename from mindspore/ccsrc/ir/value.cc
rename to mindspore/core/ir/value.cc
diff --git a/mindspore/ccsrc/ir/value.h b/mindspore/core/ir/value.h
similarity index 99%
rename from mindspore/ccsrc/ir/value.h
rename to mindspore/core/ir/value.h
index ea9bb47ffe1..535de81adf0 100644
--- a/mindspore/ccsrc/ir/value.h
+++ b/mindspore/core/ir/value.h
@@ -25,7 +25,7 @@
 #include <sstream>
 #include <utility>
 
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/anf.h"
 #include "ir/dtype.h"
 #include "ir/scalar.h"
diff --git a/mindspore/ccsrc/ir/value_extends.cc b/mindspore/core/ir/value_extends.cc
similarity index 91%
rename from mindspore/ccsrc/ir/value_extends.cc
rename to mindspore/core/ir/value_extends.cc
index 8eb34d0eeb1..c75da806650 100644
--- a/mindspore/ccsrc/ir/value_extends.cc
+++ b/mindspore/core/ir/value_extends.cc
@@ -20,8 +20,7 @@
 #include <cmath>
 #include <cfloat>
 
-#include "pybind_api/api_register.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 
 namespace mindspore {
 using ContextPtr = abstract::AnalysisContextPtr;
@@ -83,9 +82,4 @@ abstract::AbstractBasePtr ValueDictionary::ToAbstract() {
     [](const std::pair<std::string, ValuePtr> &item) { return std::make_pair(item.first, item.second->ToAbstract()); });
   return std::make_shared<abstract::AbstractDictionary>(kv);
 }
-
-REGISTER_PYBIND_DEFINE(
-  RefKey, ([](const py::module *m) {
-    (void)py::class_<RefKey, std::shared_ptr<RefKey>>(*m, "RefKey").def(py::init<std::string>(), py::arg("tag"));
-  }));
 }  // namespace mindspore
diff --git a/mindspore/core/ir/value_py.cc b/mindspore/core/ir/value_py.cc
new file mode 100644
index 00000000000..1d80c74c4d7
--- /dev/null
+++ b/mindspore/core/ir/value_py.cc
@@ -0,0 +1,29 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/value.h"
+#include <string>
+
+#include "pybind_api/api_register.h"
+#include "abstract/abstract_value.h"
+
+namespace mindspore {
+// Define python 'RefKey' class.
+REGISTER_PYBIND_DEFINE(
+  RefKey, ([](const py::module *m) {
+    (void)py::class_<RefKey, std::shared_ptr<RefKey>>(*m, "RefKey").def(py::init<std::string>(), py::arg("tag"));
+  }));
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/ir/visitor.cc b/mindspore/core/ir/visitor.cc
similarity index 100%
rename from mindspore/ccsrc/ir/visitor.cc
rename to mindspore/core/ir/visitor.cc
diff --git a/mindspore/ccsrc/ir/visitor.h b/mindspore/core/ir/visitor.h
similarity index 100%
rename from mindspore/ccsrc/ir/visitor.h
rename to mindspore/core/ir/visitor.h
diff --git a/mindspore/dataset/__init__.py b/mindspore/dataset/__init__.py
index f0070b428de..b2d26b41eee 100644
--- a/mindspore/dataset/__init__.py
+++ b/mindspore/dataset/__init__.py
@@ -18,12 +18,13 @@ datasets in special format, including mindrecord, tfrecord, manifest. Users
 can also create samplers with this module to sample data.
 """
 
-from .core.configuration import config
+from .core import config
 from .engine.datasets import TFRecordDataset, ImageFolderDatasetV2, MnistDataset, MindDataset, NumpySlicesDataset, \
     GeneratorDataset, ManifestDataset, Cifar10Dataset, Cifar100Dataset, VOCDataset, CocoDataset, CelebADataset,\
     TextFileDataset, CLUEDataset, Schema, Shuffle, zip, RandomDataset
 from .engine.samplers import DistributedSampler, PKSampler, RandomSampler, SequentialSampler, SubsetRandomSampler, \
     WeightedRandomSampler, Sampler
+from .engine.cache_client import DatasetCache
 from .engine.serializer_deserializer import serialize, deserialize, show
 from .engine.graphdata import GraphData
 
diff --git a/mindspore/dataset/core/config.py b/mindspore/dataset/core/config.py
new file mode 100644
index 00000000000..c863186d97b
--- /dev/null
+++ b/mindspore/dataset/core/config.py
@@ -0,0 +1,195 @@
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+The configuration manager.
+"""
+import random
+import numpy
+import mindspore._c_dataengine as cde
+
+__all__ = ['set_seed', 'get_seed', 'set_prefetch_size', 'get_prefetch_size', 'set_num_parallel_workers',
+           'get_num_parallel_workers', 'set_monitor_sampling_interval', 'get_monitor_sampling_interval', 'load']
+
+INT32_MAX = 2147483647
+UINT32_MAX = 4294967295
+
+_config = cde.GlobalContext.config_manager()
+
+
+def set_seed(seed):
+    """
+    Set the seed to be used in any random generator. This is used to produce deterministic results.
+
+    Note:
+        This set_seed function sets the seed in the python random library and numpy.random library
+        for deterministic python augmentations using randomness. This set_seed function should
+        be called with every iterator created to reset the random seed. In our pipeline this
+        does not guarantee deterministic results with num_parallel_workers > 1.
+
+    Args:
+        seed(int): seed to be set.
+
+    Raises:
+        ValueError: If seed is invalid (< 0 or > MAX_UINT_32).
+
+    Examples:
+        >>> import mindspore.dataset as ds
+        >>> # sets the new seed value, now operators with a random seed will use new seed value.
+        >>> ds.config.set_seed(1000)
+    """
+    if seed < 0 or seed > UINT32_MAX:
+        raise ValueError("Seed given is not within the required range.")
+    _config.set_seed(seed)
+    random.seed(seed)
+    # numpy.random isn't thread safe
+    numpy.random.seed(seed)
+
+
+def get_seed():
+    """
+    Get the seed.
+
+    Returns:
+        Int, seed.
+    """
+    return _config.get_seed()
+
+
+def set_prefetch_size(size):
+    """
+    Set the number of rows to be prefetched.
+
+    Args:
+        size (int): total number of rows to be prefetched.
+
+    Raises:
+        ValueError: If prefetch_size is invalid (<= 0 or > MAX_INT_32).
+
+    Examples:
+        >>> import mindspore.dataset as ds
+        >>> # sets the new prefetch value.
+        >>> ds.config.set_prefetch_size(1000)
+    """
+    if size <= 0 or size > INT32_MAX:
+        raise ValueError("Prefetch size given is not within the required range.")
+    _config.set_op_connector_size(size)
+
+
+def get_prefetch_size():
+    """
+    Get the prefetch size in number of rows.
+
+    Returns:
+        Size, total number of rows to be prefetched.
+    """
+    return _config.get_op_connector_size()
+
+
+def set_num_parallel_workers(num):
+    """
+    Set the default number of parallel workers.
+
+    Args:
+        num (int): number of parallel workers to be used as a default for each operation.
+
+    Raises:
+        ValueError: If num_parallel_workers is invalid (<= 0 or > MAX_INT_32).
+
+    Examples:
+        >>> import mindspore.dataset as ds
+        >>> # sets the new parallel_workers value, now parallel dataset operators will run with 8 workers.
+        >>> ds.config.set_num_parallel_workers(8)
+    """
+    if num <= 0 or num > INT32_MAX:
+        raise ValueError("Num workers given is not within the required range.")
+    _config.set_num_parallel_workers(num)
+
+
+def get_num_parallel_workers():
+    """
+    Get the default number of parallel workers.
+
+    Returns:
+        Int, number of parallel workers to be used as a default for each operation
+    """
+    return _config.get_num_parallel_workers()
+
+
+def set_monitor_sampling_interval(interval):
+    """
+    Set the default interval(ms) of monitor sampling.
+
+    Args:
+        interval (int): interval(ms) to be used to performance monitor sampling.
+
+    Raises:
+        ValueError: If interval is invalid (<= 0 or > MAX_INT_32).
+
+    Examples:
+        >>> import mindspore.dataset as ds
+        >>> # sets the new interval value.
+        >>> ds.config.set_monitor_sampling_interval(100)
+    """
+    if interval <= 0 or interval > INT32_MAX:
+        raise ValueError("Interval given is not within the required range.")
+    _config.set_monitor_sampling_interval(interval)
+
+
+def get_monitor_sampling_interval():
+    """
+    Get the default interval of performance monitor sampling.
+
+    Returns:
+        Interval: interval(ms) of performance monitor sampling.
+    """
+    return _config.get_monitor_sampling_interval()
+
+
+def __str__():
+    """
+    String representation of the configurations.
+
+    Returns:
+        Str, configurations.
+    """
+    return str(_config)
+
+
+def load(file):
+    """
+    Load configuration from a file.
+
+    Args:
+        file (str): path the config file to be loaded.
+
+    Raises:
+        RuntimeError: If file is invalid and parsing fails.
+
+    Examples:
+        >>> import mindspore.dataset as ds
+        >>> # sets the default value according to values in configuration file.
+        >>> ds.config.load("path/to/config/file")
+        >>> # example config file:
+        >>> # {
+        >>> #     "logFilePath": "/tmp",
+        >>> #     "rowsPerBuffer": 32,
+        >>> #     "numParallelWorkers": 4,
+        >>> #     "workerConnectorSize": 16,
+        >>> #     "opConnectorSize": 16,
+        >>> #     "seed": 5489,
+        >>> #     "monitorSamplingInterval": 30
+        >>> # }
+    """
+    _config.load(file)
diff --git a/mindspore/dataset/core/configuration.py b/mindspore/dataset/core/configuration.py
deleted file mode 100644
index 5376c668c40..00000000000
--- a/mindspore/dataset/core/configuration.py
+++ /dev/null
@@ -1,195 +0,0 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""
-The configuration manager.
-"""
-import random
-import numpy
-import mindspore._c_dataengine as cde
-
-INT32_MAX = 2147483647
-UINT32_MAX = 4294967295
-
-
-class ConfigurationManager:
-    """The configuration manager"""
-
-    def __init__(self):
-        self.config = cde.GlobalContext.config_manager()
-
-    def set_seed(self, seed):
-        """
-        Set the seed to be used in any random generator. This is used to produce deterministic results.
-
-        Note:
-            This set_seed function sets the seed in the python random library and numpy.random library
-            for deterministic python augmentations using randomness. This set_seed function should
-            be called with every iterator created to reset the random seed. In our pipeline this
-            does not guarantee deterministic results with num_parallel_workers > 1.
-
-        Args:
-            seed(int): seed to be set
-
-        Raises:
-            ValueError: If seed is invalid (< 0 or > MAX_UINT_32).
-
-        Examples:
-            >>> import mindspore.dataset as ds
-            >>> con = ds.engine.ConfigurationManager()
-            >>> # sets the new seed value, now operators with a random seed will use new seed value.
-            >>> con.set_seed(1000)
-        """
-        if seed < 0 or seed > UINT32_MAX:
-            raise ValueError("Seed given is not within the required range")
-        self.config.set_seed(seed)
-        random.seed(seed)
-        # numpy.random isn't thread safe
-        numpy.random.seed(seed)
-
-    def get_seed(self):
-        """
-        Get the seed
-
-        Returns:
-            Int, seed.
-        """
-        return self.config.get_seed()
-
-    def set_prefetch_size(self, size):
-        """
-        Set the number of rows to be prefetched.
-
-        Args:
-            size: total number of rows to be prefetched.
-
-        Raises:
-            ValueError: If prefetch_size is invalid (<= 0 or > MAX_INT_32).
-
-        Examples:
-            >>> import mindspore.dataset as ds
-            >>> con = ds.engine.ConfigurationManager()
-            >>> # sets the new prefetch value.
-            >>> con.set_prefetch_size(1000)
-        """
-        if size <= 0 or size > INT32_MAX:
-            raise ValueError("Prefetch size given is not within the required range")
-        self.config.set_op_connector_size(size)
-
-    def get_prefetch_size(self):
-        """
-        Get the prefetch size in number of rows.
-
-        Returns:
-            Size, total number of rows to be prefetched.
-        """
-        return self.config.get_op_connector_size()
-
-    def set_num_parallel_workers(self, num):
-        """
-        Set the default number of parallel workers
-
-        Args:
-            num: number of parallel workers to be used as a default for each operation
-
-        Raises:
-            ValueError: If num_parallel_workers is invalid (<= 0 or > MAX_INT_32).
-
-        Examples:
-            >>> import mindspore.dataset as ds
-            >>> con = ds.engine.ConfigurationManager()
-            >>> # sets the new parallel_workers value, now parallel dataset operators will run with 8 workers.
-            >>> con.set_num_parallel_workers(8)
-        """
-        if num <= 0 or num > INT32_MAX:
-            raise ValueError("Num workers given is not within the required range")
-        self.config.set_num_parallel_workers(num)
-
-    def get_num_parallel_workers(self):
-        """
-        Get the default number of parallel workers.
-
-        Returns:
-            Int, number of parallel workers to be used as a default for each operation
-        """
-        return self.config.get_num_parallel_workers()
-
-    def set_monitor_sampling_interval(self, interval):
-        """
-        Set the default interval(ms) of monitor sampling.
-
-        Args:
-            interval: interval(ms) to be used to performance monitor sampling.
-
-        Raises:
-            ValueError: If interval is invalid (<= 0 or > MAX_INT_32).
-
-        Examples:
-            >>> import mindspore.dataset as ds
-            >>> con = ds.engine.ConfigurationManager()
-            >>> # sets the new interval value.
-            >>> con.set_monitor_sampling_interval(100)
-        """
-        if interval <= 0 or interval > INT32_MAX:
-            raise ValueError("Interval given is not within the required range")
-        self.config.set_monitor_sampling_interval(interval)
-
-    def get_monitor_sampling_interval(self):
-        """
-        Get the default interval of performance monitor sampling.
-
-        Returns:
-            Interval: interval(ms) of performance monitor sampling.
-        """
-        return self.config.get_monitor_sampling_interval()
-
-    def __str__(self):
-        """
-        String representation of the configurations.
-
-        Returns:
-            Str, configurations.
-        """
-        return str(self.config)
-
-    def load(self, file):
-        """
-        Load configuration from a file.
-
-        Args:
-            file: path the config file to be loaded
-
-        Raises:
-            RuntimeError: If file is invalid and parsing fails.
-
-        Examples:
-            >>> import mindspore.dataset as ds
-            >>> con = ds.engine.ConfigurationManager()
-            >>> # sets the default value according to values in configuration file.
-            >>> con.load("path/to/config/file")
-            >>> # example config file:
-            >>> # {
-            >>> #     "logFilePath": "/tmp",
-            >>> #     "rowsPerBuffer": 32,
-            >>> #     "numParallelWorkers": 4,
-            >>> #     "workerConnectorSize": 16,
-            >>> #     "opConnectorSize": 16,
-            >>> #     "seed": 5489,
-            >>> #     "monitorSamplingInterval": 30
-            >>> # }
-        """
-        self.config.load(file)
-
-
-config = ConfigurationManager()
diff --git a/mindspore/dataset/core/validator_helpers.py b/mindspore/dataset/core/validator_helpers.py
new file mode 100644
index 00000000000..8806babd639
--- /dev/null
+++ b/mindspore/dataset/core/validator_helpers.py
@@ -0,0 +1,360 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+General Validators.
+"""
+import inspect
+from multiprocessing import cpu_count
+import os
+import numpy as np
+from ..engine import samplers
+
+# POS_INT_MIN is used to limit values from starting from 0
+POS_INT_MIN = 1
+UINT8_MAX = 255
+UINT8_MIN = 0
+UINT32_MAX = 4294967295
+UINT32_MIN = 0
+UINT64_MAX = 18446744073709551615
+UINT64_MIN = 0
+INT32_MAX = 2147483647
+INT32_MIN = -2147483648
+INT64_MAX = 9223372036854775807
+INT64_MIN = -9223372036854775808
+FLOAT_MAX_INTEGER = 16777216
+FLOAT_MIN_INTEGER = -16777216
+DOUBLE_MAX_INTEGER = 9007199254740992
+DOUBLE_MIN_INTEGER = -9007199254740992
+
+valid_detype = [
+    "bool", "int8", "int16", "int32", "int64", "uint8", "uint16",
+    "uint32", "uint64", "float16", "float32", "float64", "string"
+]
+
+
+def pad_arg_name(arg_name):
+    if arg_name != "":
+        arg_name = arg_name + " "
+    return arg_name
+
+
+def check_value(value, valid_range, arg_name=""):
+    arg_name = pad_arg_name(arg_name)
+    if value < valid_range[0] or value > valid_range[1]:
+        raise ValueError(
+            "Input {0}is not within the required interval of ({1} to {2}).".format(arg_name, valid_range[0],
+                                                                                   valid_range[1]))
+
+
+def check_range(values, valid_range, arg_name=""):
+    arg_name = pad_arg_name(arg_name)
+    if not valid_range[0] <= values[0] <= values[1] <= valid_range[1]:
+        raise ValueError(
+            "Input {0}is not within the required interval of ({1} to {2}).".format(arg_name, valid_range[0],
+                                                                                   valid_range[1]))
+
+
+def check_positive(value, arg_name=""):
+    arg_name = pad_arg_name(arg_name)
+    if value <= 0:
+        raise ValueError("Input {0}must be greater than 0.".format(arg_name))
+
+
+def check_positive_float(value, arg_name=""):
+    arg_name = pad_arg_name(arg_name)
+    type_check(value, (float,), arg_name)
+    check_positive(value, arg_name)
+
+
+def check_2tuple(value, arg_name=""):
+    if not (isinstance(value, tuple) and len(value) == 2):
+        raise ValueError("Value {0}needs to be a 2-tuple.".format(arg_name))
+
+
+def check_uint8(value, arg_name=""):
+    type_check(value, (int,), arg_name)
+    check_value(value, [UINT8_MIN, UINT8_MAX])
+
+
+def check_uint32(value, arg_name=""):
+    type_check(value, (int,), arg_name)
+    check_value(value, [UINT32_MIN, UINT32_MAX])
+
+
+def check_pos_int32(value, arg_name=""):
+    type_check(value, (int,), arg_name)
+    check_value(value, [POS_INT_MIN, INT32_MAX], arg_name)
+
+
+def check_uint64(value, arg_name=""):
+    type_check(value, (int,), arg_name)
+    check_value(value, [UINT64_MIN, UINT64_MAX])
+
+
+def check_pos_int64(value, arg_name=""):
+    type_check(value, (int,), arg_name)
+    check_value(value, [UINT64_MIN, INT64_MAX])
+
+
+def check_pos_float32(value, arg_name=""):
+    check_value(value, [UINT32_MIN, FLOAT_MAX_INTEGER], arg_name)
+
+
+def check_pos_float64(value, arg_name=""):
+    check_value(value, [UINT64_MIN, DOUBLE_MAX_INTEGER], arg_name)
+
+
+def check_valid_detype(type_):
+    if type_ not in valid_detype:
+        raise ValueError("Unknown column type")
+    return True
+
+
+def check_columns(columns, name):
+    """
+    Validate strings in column_names.
+
+    Args:
+        columns (list): list of column_names.
+        name (str): name of columns.
+
+    Returns:
+        Exception: when the value is not correct, otherwise nothing.
+    """
+    type_check(columns, (list, str), name)
+    if isinstance(columns, list):
+        if not columns:
+            raise ValueError("{0} should not be empty".format(name))
+        for i, column_name in enumerate(columns):
+            if not column_name:
+                raise ValueError("{0}[{1}] should not be empty".format(name, i))
+
+        col_names = ["{0}[{1}]".format(name, i) for i in range(len(columns))]
+        type_check_list(columns, (str,), col_names)
+        if len(set(columns)) != len(columns):
+            raise ValueError("Every column name should not be same with others in column_names.")
+
+
+def parse_user_args(method, *args, **kwargs):
+    """
+    Parse user arguments in a function.
+
+    Args:
+        method (method): a callable function.
+        *args: user passed args.
+        **kwargs: user passed kwargs.
+
+    Returns:
+        user_filled_args (list): values of what the user passed in for the arguments.
+        ba.arguments (Ordered Dict): ordered dict of parameter and argument for what the user has passed.
+    """
+    sig = inspect.signature(method)
+    if 'self' in sig.parameters or 'cls' in sig.parameters:
+        ba = sig.bind(method, *args, **kwargs)
+        ba.apply_defaults()
+        params = list(sig.parameters.keys())[1:]
+    else:
+        ba = sig.bind(*args, **kwargs)
+        ba.apply_defaults()
+        params = list(sig.parameters.keys())
+
+    user_filled_args = [ba.arguments.get(arg_value) for arg_value in params]
+    return user_filled_args, ba.arguments
+
+
+def type_check_list(args, types, arg_names):
+    """
+    Check the type of each parameter in the list.
+
+    Args:
+        args (list, tuple): a list or tuple of any variable.
+        types (tuple): tuple of all valid types for arg.
+        arg_names (list, tuple of str): the names of args.
+
+    Returns:
+        Exception: when the type is not correct, otherwise nothing.
+    """
+    type_check(args, (list, tuple,), arg_names)
+    if len(args) != len(arg_names):
+        raise ValueError("List of arguments is not the same length as argument_names.")
+    for arg, arg_name in zip(args, arg_names):
+        type_check(arg, types, arg_name)
+
+
+def type_check(arg, types, arg_name):
+    """
+    Check the type of the parameter.
+
+    Args:
+        arg : any variable.
+        types (tuple): tuple of all valid types for arg.
+        arg_name (str): the name of arg.
+
+    Returns:
+        Exception: when the type is not correct, otherwise nothing.
+    """
+    # handle special case of booleans being a subclass of ints
+    print_value = '\"\"' if repr(arg) == repr('') else arg
+
+    if int in types and bool not in types:
+        if isinstance(arg, bool):
+            raise TypeError("Argument {0} with value {1} is not of type {2}.".format(arg_name, print_value, types))
+    if not isinstance(arg, types):
+        raise TypeError("Argument {0} with value {1} is not of type {2}.".format(arg_name, print_value, types))
+
+
+def check_filename(path):
+    """
+    check the filename in the path.
+
+    Args:
+        path (str): the path.
+
+    Returns:
+        Exception: when error.
+    """
+    if not isinstance(path, str):
+        raise TypeError("path: {} is not string".format(path))
+    filename = os.path.basename(path)
+
+    # '#', ':', '|', ' ', '}', '"', '+', '!', ']', '[', '\\', '`',
+    # '&', '.', '/', '@', "'", '^', ',', '_', '<', ';', '~', '>',
+    # '*', '(', '%', ')', '-', '=', '{', '?', '$'
+    forbidden_symbols = set(r'\/:*?"<>|`&\';')
+
+    if set(filename) & forbidden_symbols:
+        raise ValueError(r"filename should not contains \/:*?\"<>|`&;\'")
+
+    if filename.startswith(' ') or filename.endswith(' '):
+        raise ValueError("filename should not start/end with space")
+
+    return True
+
+
+def check_dir(dataset_dir):
+    if not os.path.isdir(dataset_dir) or not os.access(dataset_dir, os.R_OK):
+        raise ValueError("The folder {} does not exist or permission denied!".format(dataset_dir))
+
+
+def check_file(dataset_file):
+    check_filename(dataset_file)
+    if not os.path.isfile(dataset_file) or not os.access(dataset_file, os.R_OK):
+        raise ValueError("The file {} does not exist or permission denied!".format(dataset_file))
+
+
+def check_sampler_shuffle_shard_options(param_dict):
+    """
+    Check for valid shuffle, sampler, num_shards, and shard_id inputs.
+    Args:
+        param_dict (dict): param_dict.
+
+    Returns:
+        Exception: ValueError or RuntimeError if error.
+    """
+    shuffle, sampler = param_dict.get('shuffle'), param_dict.get('sampler')
+    num_shards, shard_id = param_dict.get('num_shards'), param_dict.get('shard_id')
+
+    type_check(sampler, (type(None), samplers.BuiltinSampler, samplers.Sampler), "sampler")
+
+    if sampler is not None:
+        if shuffle is not None:
+            raise RuntimeError("sampler and shuffle cannot be specified at the same time.")
+        if num_shards is not None:
+            raise RuntimeError("sampler and sharding cannot be specified at the same time.")
+
+    if num_shards is not None:
+        check_pos_int32(num_shards)
+        if shard_id is None:
+            raise RuntimeError("num_shards is specified and currently requires shard_id as well.")
+        check_value(shard_id, [0, num_shards - 1], "shard_id")
+
+    if num_shards is None and shard_id is not None:
+        raise RuntimeError("shard_id is specified but num_shards is not.")
+
+
+def check_padding_options(param_dict):
+    """
+    Check for valid padded_sample and num_padded of padded samples.
+
+    Args:
+        param_dict (dict): param_dict.
+
+    Returns:
+        Exception: ValueError or RuntimeError if error.
+    """
+
+    columns_list = param_dict.get('columns_list')
+    block_reader = param_dict.get('block_reader')
+    padded_sample, num_padded = param_dict.get('padded_sample'), param_dict.get('num_padded')
+    if padded_sample is not None:
+        if num_padded is None:
+            raise RuntimeError("padded_sample is specified and requires num_padded as well.")
+        if num_padded < 0:
+            raise ValueError("num_padded is invalid, num_padded={}.".format(num_padded))
+        if columns_list is None:
+            raise RuntimeError("padded_sample is specified and requires columns_list as well.")
+        for column in columns_list:
+            if column not in padded_sample:
+                raise ValueError("padded_sample cannot match columns_list.")
+        if block_reader:
+            raise RuntimeError("block_reader and padded_sample cannot be specified at the same time.")
+
+    if padded_sample is None and num_padded is not None:
+        raise RuntimeError("num_padded is specified but padded_sample is not.")
+
+
+def check_num_parallel_workers(value):
+    type_check(value, (int,), "num_parallel_workers")
+    if value < 1 or value > cpu_count():
+        raise ValueError("num_parallel_workers exceeds the boundary between 1 and {}!".format(cpu_count()))
+
+
+def check_num_samples(value):
+    type_check(value, (int,), "num_samples")
+    check_value(value, [0, INT32_MAX], "num_samples")
+
+
+def validate_dataset_param_value(param_list, param_dict, param_type):
+    for param_name in param_list:
+        if param_dict.get(param_name) is not None:
+            if param_name == 'num_parallel_workers':
+                check_num_parallel_workers(param_dict.get(param_name))
+            if param_name == 'num_samples':
+                check_num_samples(param_dict.get(param_name))
+            else:
+                type_check(param_dict.get(param_name), (param_type,), param_name)
+
+
+def check_gnn_list_or_ndarray(param, param_name):
+    """
+    Check if the input parameter is list or numpy.ndarray.
+
+    Args:
+        param (list, nd.ndarray): param.
+        param_name (str): param_name.
+
+    Returns:
+        Exception: TypeError if error.
+    """
+
+    type_check(param, (list, np.ndarray), param_name)
+    if isinstance(param, list):
+        param_names = ["param_{0}".format(i) for i in range(len(param))]
+        type_check_list(param, (int,), param_names)
+
+    elif isinstance(param, np.ndarray):
+        if not param.dtype == np.int32:
+            raise TypeError("Each member in {0} should be of type int32. Got {1}.".format(
+                param_name, param.dtype))
diff --git a/mindspore/dataset/engine/__init__.py b/mindspore/dataset/engine/__init__.py
index 674848f156c..b3624e1ca34 100644
--- a/mindspore/dataset/engine/__init__.py
+++ b/mindspore/dataset/engine/__init__.py
@@ -26,10 +26,9 @@ from .datasets import *
 from .iterators import *
 from .serializer_deserializer import serialize, deserialize, show, compare
 from .samplers import *
-from ..core.configuration import config, ConfigurationManager
+from ..core import config
 
-__all__ = ["config", "ConfigurationManager", "zip",
-           "ImageFolderDatasetV2", "MnistDataset",
+__all__ = ["config", "zip", "ImageFolderDatasetV2", "MnistDataset",
            "MindDataset", "GeneratorDataset", "TFRecordDataset", "CLUEDataset",
            "ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset",
            "VOCDataset", "CocoDataset", "TextFileDataset", "Schema", "DistributedSampler",
diff --git a/mindspore/dataset/engine/cache_client.py b/mindspore/dataset/engine/cache_client.py
new file mode 100644
index 00000000000..800c0dab1de
--- /dev/null
+++ b/mindspore/dataset/engine/cache_client.py
@@ -0,0 +1,49 @@
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Cache client
+"""
+
+import copy
+from mindspore._c_dataengine import CacheClient
+
+class DatasetCache:
+    """
+    A client to interface with tensor caching service
+    """
+
+    def __init__(self, session_id=None, size=None, spilling=False):
+        if session_id is None:
+            raise RuntimeError("Session generation is not implemented yet. session id required")
+        self.size = size if size is not None else 0
+        if size < 0:
+            raise ValueError("cache size should be 0 or positive integer value but got: size={}".format(size))
+        if not isinstance(spilling, bool):
+            raise ValueError(
+                "spilling argument for cache should be a boolean value but got: spilling={}".format(spilling))
+        self.session_id = session_id
+        self.spilling = spilling
+        self.cache_client = CacheClient(session_id, size, spilling)
+
+    def __deepcopy__(self, memodict):
+        if id(self) in memodict:
+            return memodict[id(self)]
+        cls = self.__class__
+        new_cache = cls.__new__(cls)
+        memodict[id(self)] = new_cache
+        new_cache.session_id = copy.deepcopy(self.session_id, memodict)
+        new_cache.spilling = copy.deepcopy(self.spilling, memodict)
+        new_cache.size = copy.deepcopy(self.size, memodict)
+        new_cache.cache_client = self.cache_client
+        return new_cache
diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index ae0dc6789e1..846e7e0a562 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -38,13 +38,13 @@ from mindspore._c_expression import typing
 
 from mindspore import log as logger
 from . import samplers
-from .iterators import DictIterator, TupleIterator
+from .iterators import DictIterator, TupleIterator, DummyIterator
 from .validators import check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \
     check_rename, check_numpyslicesdataset, \
     check_take, check_project, check_imagefolderdatasetv2, check_mnist_cifar_dataset, check_manifestdataset, \
     check_tfrecorddataset, check_vocdataset, check_cocodataset, check_celebadataset, check_minddataset, \
     check_generatordataset, check_sync_wait, check_zip_dataset, check_add_column, check_textfiledataset, check_concat, \
-    check_split, check_bucket_batch_by_length, check_cluedataset
+    check_random_dataset, check_split, check_bucket_batch_by_length, check_cluedataset, check_positive_int32
 from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist
 
 try:
@@ -146,6 +146,12 @@ class Dataset:
         self._num_classes = None
         self._repeat_count = None
         self._sync = False
+        self.ms_role = os.getenv("MS_ROLE")
+
+    def _noop_mode(self):
+        if self.ms_role in ("MS_PSERVER", "MS_SCHED"):
+            return True
+        return False
 
     def __add__(self, datasets):
         return self.concat(datasets)
@@ -386,7 +392,7 @@ class Dataset:
 
     @check_map
     def map(self, input_columns=None, operations=None, output_columns=None, columns_order=None,
-            num_parallel_workers=None, python_multiprocessing=False):
+            num_parallel_workers=None, python_multiprocessing=False, cache=None):
         """
         Apply each operation in operations to this dataset.
 
@@ -427,6 +433,7 @@ class Dataset:
                 parallel (default=None, the value from the config will be used).
             python_multiprocessing (bool, optional): Parallelize python operations with multiple worker process. This
                 option could be beneficial if the python operation is computational heavy (default=False).
+            cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
 
         Returns:
             MapDataset, dataset after mapping operation.
@@ -541,7 +548,7 @@ class Dataset:
             >>> ds_mapped = ds_pyfunc.map(input_columns, operations, output_columns, columns_order)
         """
         return MapDataset(self, input_columns, operations, output_columns, columns_order, num_parallel_workers,
-                          python_multiprocessing)
+                          python_multiprocessing, cache)
 
     @check_filter
     def filter(self, predicate, input_columns=None, num_parallel_workers=1):
@@ -939,6 +946,7 @@ class Dataset:
             raise TypeError("apply_func must return a dataset.")
         return dataset
 
+    @check_positive_int32
     def device_que(self, prefetch_size=None):
         """
         Return a transferredDataset that transfer data through device.
@@ -956,6 +964,7 @@ class Dataset:
         """
         return self.to_device()
 
+    @check_positive_int32
     def to_device(self, num_batch=None):
         """
         Transfer data through CPU, GPU or Ascend devices.
@@ -973,10 +982,14 @@ class Dataset:
         Raises:
             TypeError: If device_type is empty.
             ValueError: If device_type is not 'Ascend', 'GPU' or 'CPU'.
-            ValueError: If num_batch is None or 0 or larger than int_max.
+            ValueError: If num_batch is not positive or larger than int_max.
+            ValueError: If dataset size is None or 0.
             RuntimeError: If dataset is unknown.
             RuntimeError: If distribution file path is given but failed to read.
         """
+        if self.get_dataset_size() is None or 0:
+            raise ValueError("dataset size is None or 0.")
+
         if num_batch is None:
             num_batch = self.get_dataset_size()
             repeat_count = self.get_repeat_count()
@@ -995,8 +1008,8 @@ class Dataset:
         if device_type not in ('Ascend', 'GPU', 'CPU'):
             raise ValueError("Only support CPU, Ascend, GPU")
 
-        if num_batch is None or num_batch == 0:
-            raise ValueError("num_batch is None or 0.")
+        if num_batch == 0:
+            raise ValueError("num_batch is 0.")
 
         def get_distribution(output_dataset):
             dev_id = 0
@@ -1055,6 +1068,8 @@ class Dataset:
             >>>     # convert the returned tuple to a list and print
             >>>     print(list(item))
         """
+        if self._noop_mode():
+            return DummyIterator(self, 'tuple')
         return TupleIterator(self, columns)
 
     def create_dict_iterator(self):
@@ -1078,6 +1093,8 @@ class Dataset:
             >>>     print(item["column1"])
 
         """
+        if self._noop_mode():
+            return DummyIterator(self, 'dict')
         return DictIterator(self)
 
     def __iter__(self):
@@ -1556,7 +1573,7 @@ class BatchDataset(DatasetOp):
             Number, number of batches.
         """
         child_size = self.children[0].get_dataset_size()
-        if child_size is not None:
+        if child_size is not None and isinstance(self.batch_size, int):
             if self.drop_remainder:
                 return math.floor(child_size / self.batch_size)
             return math.ceil(child_size / self.batch_size)
@@ -1862,13 +1879,14 @@ class MapDataset(DatasetOp):
             in parallel (default=None).
         python_multiprocessing (bool, optional): Parallelize python operations with multiple worker process. This
             option could be beneficial if the python operation is computational heavy (default=False).
+        cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
 
         Raises:
             ValueError: If len(input_columns) != len(output_columns) and columns_order is not specified.
     """
 
     def __init__(self, input_dataset, input_columns=None, operations=None, output_columns=None, columns_order=None,
-                 num_parallel_workers=None, python_multiprocessing=False):
+                 num_parallel_workers=None, python_multiprocessing=False, cache=None):
         super().__init__(num_parallel_workers)
         self.children.append(input_dataset)
         if input_columns is not None and not isinstance(input_columns, list):
@@ -1880,6 +1898,7 @@ class MapDataset(DatasetOp):
         if output_columns is not None and not isinstance(output_columns, list):
             output_columns = [output_columns]
         self.output_columns = output_columns
+        self.cache = cache
         self.columns_order = columns_order
 
         if self.input_columns and self.output_columns \
@@ -1898,6 +1917,7 @@ class MapDataset(DatasetOp):
         args["operations"] = self.operations
         args["output_columns"] = self.output_columns
         args["columns_order"] = self.columns_order
+        args["cache"] = self.cache.cache_client if self.cache is not None else None
         return args
 
     def get_dataset_size(self):
@@ -1923,6 +1943,7 @@ class MapDataset(DatasetOp):
         new_op.parent = copy.deepcopy(self.parent, memodict)
         new_op.input_indexs = copy.deepcopy(self._input_indexs, memodict)
         new_op.python_multiprocessing = copy.deepcopy(self.python_multiprocessing, memodict)
+        new_op.cache = copy.deepcopy(self.cache, memodict)
         new_op.operations = self.operations
         return new_op
 
@@ -2307,6 +2328,8 @@ class TransferDataset(DatasetOp):
 
     def send(self):
         # need to keep iterator alive so the executionTree is not destroyed
+        if self._noop_mode():
+            return
         self.iterator = TupleIterator(self)
 
 
@@ -2340,7 +2363,7 @@ class RangeDataset(MappableDataset):
         return False
 
 
-def _select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id):
+def _select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id, non_mappable=False):
     """
     Create sampler based on user input.
 
@@ -2350,7 +2373,11 @@ def _select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id):
         shuffle (bool): Shuffle.
         num_shards (int): Number of shard for sharding.
         shard_id (int): Shard ID.
+        non_mappable (bool, optional): Indicate if caller is non-mappable dataset for special handling (default=False).
     """
+    if non_mappable is True and all(arg is None for arg in [num_samples, shuffle, num_shards, shard_id, input_sampler]):
+        return None
+
     if input_sampler is not None:
         # If the user provided a sampler, then it doesn't matter what the other args are because
         # we are being asked specifically to use the given sampler.
@@ -2363,7 +2390,7 @@ def _select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id):
         if (isinstance(input_sampler, (samplers.SequentialSampler, samplers.DistributedSampler,
                                        samplers.RandomSampler, samplers.SubsetRandomSampler,
                                        samplers.WeightedRandomSampler, samplers.Sampler)) and
-                (num_shards is not None or shard_id is not None or shuffle is not None or num_samples is not None)):
+                (any(arg is not None for arg in [num_shards, shard_id, shuffle, num_samples]))):
             raise ValueError(
                 'Conflicting arguments during sampler assignments. num_samples: {}, num_shards: {},'
                 ' shard_id: {}, shuffle: {})'.format(num_samples, num_shards, shard_id, shuffle))
@@ -2452,6 +2479,7 @@ class ImageFolderDatasetV2(MappableDataset):
             into (default=None).
         shard_id (int, optional): The shard ID within num_shards (default=None). This
             argument should be specified only when num_shards is also specified.
+        cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
 
     Raises:
         RuntimeError: If sampler and shuffle are specified at the same time.
@@ -2476,7 +2504,7 @@ class ImageFolderDatasetV2(MappableDataset):
     @check_imagefolderdatasetv2
     def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None,
                  shuffle=None, sampler=None, extensions=None, class_indexing=None,
-                 decode=False, num_shards=None, shard_id=None):
+                 decode=False, num_shards=None, shard_id=None, cache=None):
         super().__init__(num_parallel_workers)
 
         self.dataset_dir = dataset_dir
@@ -2488,6 +2516,7 @@ class ImageFolderDatasetV2(MappableDataset):
         self.decode = decode
         self.num_shards = num_shards
         self.shard_id = shard_id
+        self.cache = cache
 
     def get_args(self):
         args = super().get_args()
@@ -2500,6 +2529,7 @@ class ImageFolderDatasetV2(MappableDataset):
         args["decode"] = self.decode
         args["num_shards"] = self.num_shards
         args["shard_id"] = self.shard_id
+        args["cache"] = self.cache.cache_client if self.cache is not None else None
         return args
 
     def get_dataset_size(self):
@@ -3245,6 +3275,7 @@ class TFRecordDataset(SourceDataset):
             argument should be specified only when num_shards is also specified.
         shard_equal_rows (bool): Get equal rows for all shards(default=False). If shard_equal_rows is false, number
             of rows of each shard may be not equal.
+        cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
     Examples:
         >>> import mindspore.dataset as ds
         >>> import mindspore.common.dtype as mstype
@@ -3262,7 +3293,7 @@ class TFRecordDataset(SourceDataset):
 
     @check_tfrecorddataset
     def __init__(self, dataset_files, schema=None, columns_list=None, num_samples=None, num_parallel_workers=None,
-                 shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, shard_equal_rows=False):
+                 shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, shard_equal_rows=False, cache=None):
         super().__init__(num_parallel_workers)
         self.dataset_files = self._find_files(dataset_files)
         self.dataset_files.sort()
@@ -3274,6 +3305,7 @@ class TFRecordDataset(SourceDataset):
         self.schema = schema
         self.columns_list = columns_list
         self.num_samples = num_samples
+        self.cache = cache
         if schema_obj is not None and num_samples is None:
             self.num_samples = schema_obj.num_rows
 
@@ -3289,6 +3321,14 @@ class TFRecordDataset(SourceDataset):
         else:
             self.shuffle_level = shuffle
             self.shuffle_files = True
+
+        # The TF record dataset does not directly support a sampler.  It has provided sampling arguments
+        # (shuffle, num_samples, num_shards, shard_id) and it DOES support sampling if somewhere above it in
+        # the pipeline contains a cache.  If there is no cache above it, then this sampler is not used.
+        sampler_shuffle = self.shuffle_files
+        sampler = None
+        self.sampler = _select_sampler(self.num_samples, sampler, sampler_shuffle, num_shards, shard_id,
+                                       non_mappable=True)
         self.shard_equal_rows = shard_equal_rows
 
     def get_args(self):
@@ -3312,6 +3352,8 @@ class TFRecordDataset(SourceDataset):
         args["num_shards"] = self.num_shards
         args["shard_id"] = self.shard_id
         args["shard_equal_rows"] = self.shard_equal_rows
+        args["cache"] = self.cache.cache_client if self.cache is not None else None
+        args["sampler"] = self.sampler
         return args
 
     def get_dataset_size(self, estimate=False):
@@ -3797,43 +3839,61 @@ class RandomDataset(SourceDataset):
     A source dataset that generates random data.
 
     Args:
-        num_samples (int): number of samples to generate.
+        total_rows (int): number of rows for the dataset to generate (default=None, number of rows is random)
         schema (str or Schema, optional): Path to the json schema file or schema object (default=None).
             If the schema is not provided, the random dataset generates a random schema.
         columns_list (list[str], optional): List of columns to be read (default=None, read all columns)
+        num_samples (int): number of samples to draw from the total. (default=None, which means all rows)
         num_parallel_workers (int, optional): number of workers to read the data
             (default=None, number set in the config).
+        cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset should be divided
+            into (default=None).
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument should be specified only when num_shards is also specified.
     """
 
-    def __init__(self, schema=None, columns_list=None, num_samples=None, num_parallel_workers=None):
+    @check_random_dataset
+    def __init__(self, total_rows=None, schema=None, columns_list=None, num_samples=None, num_parallel_workers=None,
+                 cache=None, shuffle=None, num_shards=None, shard_id=None):
         super().__init__(num_parallel_workers)
         schema_obj = None
         if (schema is not None) and (not isinstance(schema, Schema)):
             schema_obj = Schema(schema)  # read the schema file and convert to schema object to validate it
         self.schema = schema
         self.columns_list = columns_list
-        if schema_obj is not None and num_samples is None:
-            self.num_samples = schema_obj.num_rows
-        elif num_samples is None:
-            self.num_samples = 0
+        sampler = None
+        self.sampler = _select_sampler(num_samples, sampler, shuffle, num_shards, shard_id, non_mappable=True)
+        self.num_samples = num_samples
+        self.cache = cache
+        if schema_obj is not None and total_rows is None:
+            self.total_rows = schema_obj.num_rows
+        elif total_rows is None:
+            self.total_rows = 0
         else:
-            self.num_samples = num_samples
+            self.total_rows = total_rows
+        self.num_shards = num_shards
+        self.shard_id = shard_id
+        self.shuffle_level = shuffle
 
     def get_args(self):
         args = super().get_args()
         if self.schema is not None:
             if isinstance(self.schema, Schema):
                 self.schema.datasetType = 'Random'
-                if self.num_samples is not None:
-                    self.schema.num_rows = self.num_samples
+                if self.total_rows is not None:
+                    self.schema.num_rows = self.total_rows
                 args["schema_json_string"] = self.schema.to_json()
             else:
                 args["schema_file_path"] = self.schema
         args["schema"] = self.schema
-        if self.columns_list is not None:
-            args["columns_list"] = self.columns_list
-        if self.num_samples is not None:
-            args["num_samples"] = self.num_samples
+        args["columns_list"] = self.columns_list
+        args["num_samples"] = self.num_samples
+        args["total_rows"] = self.total_rows
+        args["cache"] = self.cache.cache_client if self.cache is not None else None
+        args["sampler"] = self.sampler
         return args
 
     def get_dataset_size(self):
@@ -3843,18 +3903,28 @@ class RandomDataset(SourceDataset):
         Return:
             Number, number of batches.
         """
+
+        num_rows = CifarOp.get_num_rows(self.dataset_dir, True)
+
+        rows_per_shard = get_num_rows(num_rows, self.num_shards)
         rows_from_sampler = self._get_sampler_dataset_size()
 
         if rows_from_sampler is None:
-            return self.num_samples
+            return rows_per_shard
 
-        return min(rows_from_sampler, self.num_samples)
+        return min(rows_from_sampler, rows_per_shard)
 
     def is_shuffled(self):
-        return True
+        if self.shuffle_level is None:
+            return True
+
+        return self.shuffle_level or self.sampler.is_shuffled()
 
     def is_sharded(self):
-        return False
+        if self.num_shards is not None:
+            return self.num_shards > 1
+
+        return self.sampler.is_sharded()
 
 
 class Schema:
diff --git a/mindspore/dataset/engine/graphdata.py b/mindspore/dataset/engine/graphdata.py
index 472819784e9..81314b43734 100644
--- a/mindspore/dataset/engine/graphdata.py
+++ b/mindspore/dataset/engine/graphdata.py
@@ -22,7 +22,8 @@ from mindspore._c_dataengine import Tensor
 
 from .validators import check_gnn_graphdata, check_gnn_get_all_nodes, check_gnn_get_all_edges, \
     check_gnn_get_nodes_from_edges, check_gnn_get_all_neighbors, check_gnn_get_sampled_neighbors, \
-    check_gnn_get_neg_sampled_neighbors, check_gnn_get_node_feature, check_gnn_random_walk
+    check_gnn_get_neg_sampled_neighbors, check_gnn_get_node_feature, check_gnn_get_edge_feature, \
+    check_gnn_random_walk
 
 
 class GraphData:
@@ -127,7 +128,13 @@ class GraphData:
     @check_gnn_get_sampled_neighbors
     def get_sampled_neighbors(self, node_list, neighbor_nums, neighbor_types):
         """
-        Get sampled neighbor information, maximum support 6-hop sampling.
+        Get sampled neighbor information.
+
+        The api supports multi-hop neighbor sampling. That is, the previous sampling result is used as the input of
+        next-hop sampling. A maximum of 6-hop are allowed.
+
+        The sampling result is tiled into a list in the format of [input node, 1-hop sampling result,
+        2-hop samling result ...]
 
         Args:
             node_list (list or numpy.ndarray): The given list of nodes.
@@ -207,6 +214,35 @@ class GraphData:
                 Tensor(node_list),
                 feature_types)]
 
+    @check_gnn_get_edge_feature
+    def get_edge_feature(self, edge_list, feature_types):
+        """
+        Get `feature_types` feature of the edges in `edge_list`.
+
+        Args:
+            edge_list (list or numpy.ndarray): The given list of edges.
+            feature_types (list or ndarray): The given list of feature types.
+
+        Returns:
+            numpy.ndarray: array of features.
+
+        Examples:
+            >>> import mindspore.dataset as ds
+            >>> data_graph = ds.GraphData('dataset_file', 2)
+            >>> edges = data_graph.get_all_edges(0)
+            >>> features = data_graph.get_edge_feature(edges, [1])
+
+        Raises:
+            TypeError: If `edge_list` is not list or ndarray.
+            TypeError: If `feature_types` is not list or ndarray.
+        """
+        if isinstance(edge_list, list):
+            edge_list = np.array(edge_list, dtype=np.int32)
+        return [
+            t.as_array() for t in self._graph.get_edge_feature(
+                Tensor(edge_list),
+                feature_types)]
+
     def graph_info(self):
         """
         Get the meta information of the graph, including the number of nodes, the type of nodes,
@@ -232,9 +268,10 @@ class GraphData:
         Args:
             target_nodes (list[int]): Start node list in random walk
             meta_path (list[int]): node type for each walk step
-            step_home_param (float): return hyper parameter in node2vec algorithm
-            step_away_param (float): inout hyper parameter in node2vec algorithm
-            default_node (int): default node if no more neighbors found
+            step_home_param (float, optional): return hyper parameter in node2vec algorithm (Default = 1.0).
+            step_away_param (float, optional): inout hyper parameter in node2vec algorithm (Default = 1.0).
+            default_node (int, optional): default node if no more neighbors found (Default = -1).
+                A default value of -1 indicates that no node is given.
 
         Returns:
             numpy.ndarray: array of nodes.
diff --git a/mindspore/dataset/engine/iterators.py b/mindspore/dataset/engine/iterators.py
index 1d2d28c1c04..a2a23cbb44c 100644
--- a/mindspore/dataset/engine/iterators.py
+++ b/mindspore/dataset/engine/iterators.py
@@ -17,7 +17,9 @@
 from abc import abstractmethod
 import copy
 import weakref
+import numpy as np
 
+from mindspore.common.tensor import Tensor
 from mindspore._c_dataengine import DEPipeline
 from mindspore._c_dataengine import OpName
 
@@ -287,3 +289,32 @@ class TupleIterator(Iterator):
         """
 
         return [t.as_array() for t in self.depipeline.GetNextAsList()]
+
+
+class DummyIterator():
+    """
+    A DummyIterator only work when env MS_ROLE="MS_PSERVER" or MS_ROLE="MS_SCHED"
+    """
+    def __init__(self, dataset, mode):
+        self.mode = mode
+        self.shapes = dataset.output_shapes()
+        self.types = dataset.output_types()
+        self.fetched_first = False
+
+    def __get_tensor(self):
+        tensor_row = []
+        for np_shape, np_type in zip(self.shapes, self.types):
+            input_np = np.zeros(np_shape, np_type)
+            tensor = Tensor(input_np)
+            tensor_row.append(tensor)
+        return tensor_row
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.mode == "tuple":
+            if not self.fetched_first:
+                self.fetched_first = True
+                return self.__get_tensor()
+        raise StopIteration()
diff --git a/mindspore/dataset/engine/serializer_deserializer.py b/mindspore/dataset/engine/serializer_deserializer.py
index 9d3339e26d2..8fd3a2bb9be 100644
--- a/mindspore/dataset/engine/serializer_deserializer.py
+++ b/mindspore/dataset/engine/serializer_deserializer.py
@@ -22,7 +22,7 @@ import sys
 from mindspore import log as logger
 from . import datasets as de
 from ..transforms.vision.utils import Inter, Border
-from ..core.configuration import config
+from ..core import config
 
 def serialize(dataset, json_filepath=None):
     """
@@ -173,7 +173,9 @@ def traverse(node):
     #    num_samples, shard_id, num_shards, shuffle
     # These arguments get moved into the sampler itself, so they are no longer needed to
     # be set at the dataset level.
-    if 'sampler' in node_args.keys():
+    # TF Record is a special case because it uses both the dataset and sampler arguments
+    # which is not decided until later during tree preparation phase.
+    if node_repr['op_type'] != 'TFRecordDataset' and 'sampler' in node_args.keys():
         if 'num_samples' in node_repr.keys():
             node_repr['num_samples'] = None
         if 'shuffle' in node_repr.keys():
diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index 744a9b94be7..29904f1a9ef 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -9,335 +9,151 @@
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# See the License foNtest_resr the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 
-"""Built-in validators.
+"""
+Built-in validators.
 """
 import inspect as ins
 import os
 from functools import wraps
-from multiprocessing import cpu_count
 
 import numpy as np
 from mindspore._c_expression import typing
+from ..core.validator_helpers import parse_user_args, type_check, type_check_list, check_value, \
+    INT32_MAX, check_valid_detype, check_dir, check_file, check_sampler_shuffle_shard_options, \
+    validate_dataset_param_value, check_padding_options, check_gnn_list_or_ndarray, check_num_parallel_workers, \
+    check_columns, check_pos_int32
 
 from . import datasets
 from . import samplers
+from . import cache_client
 
-INT32_MAX = 2147483647
-valid_detype = [
-    "bool", "int8", "int16", "int32", "int64", "uint8", "uint16",
-    "uint32", "uint64", "float16", "float32", "float64", "string"
-]
-
-
-def check_valid_detype(type_):
-    if type_ not in valid_detype:
-        raise ValueError("Unknown column type")
-    return True
-
-
-def check_filename(path):
-    """
-    check the filename in the path
-
-    Args:
-        path (str): the path
-
-    Returns:
-        Exception: when error
-    """
-    if not isinstance(path, str):
-        raise TypeError("path: {} is not string".format(path))
-    filename = os.path.basename(path)
-
-    # '#', ':', '|', ' ', '}', '"', '+', '!', ']', '[', '\\', '`',
-    # '&', '.', '/', '@', "'", '^', ',', '_', '<', ';', '~', '>',
-    # '*', '(', '%', ')', '-', '=', '{', '?', '$'
-    forbidden_symbols = set(r'\/:*?"<>|`&\';')
-
-    if set(filename) & forbidden_symbols:
-        raise ValueError(r"filename should not contains \/:*?\"<>|`&;\'")
-
-    if filename.startswith(' ') or filename.endswith(' '):
-        raise ValueError("filename should not start/end with space")
-
-    return True
-
-
-def make_param_dict(method, args, kwargs):
-    """Return a dictionary of the method's args and kwargs."""
-    sig = ins.signature(method)
-    params = sig.parameters
-    keys = list(params.keys())
-    param_dict = dict()
-    try:
-        for name, value in enumerate(args):
-            param_dict[keys[name]] = value
-    except IndexError:
-        raise TypeError("{0}() expected {1} arguments, but {2} were given".format(
-            method.__name__, len(keys) - 1, len(args) - 1))
-
-    param_dict.update(zip(params.keys(), args))
-    param_dict.update(kwargs)
-
-    for name, value in params.items():
-        if name not in param_dict:
-            param_dict[name] = value.default
-    return param_dict
-
-
-def check_type(param, param_name, valid_type):
-    if (not isinstance(param, valid_type)) or (valid_type == int and isinstance(param, bool)):
-        raise TypeError("Wrong input type for {0}, should be {1}, got {2}".format(param_name, valid_type, type(param)))
-
-
-def check_param_type(param_list, param_dict, param_type):
-    for param_name in param_list:
-        if param_dict.get(param_name) is not None:
-            if param_name == 'num_parallel_workers':
-                check_num_parallel_workers(param_dict.get(param_name))
-            if param_name == 'num_samples':
-                check_num_samples(param_dict.get(param_name))
-            else:
-                check_type(param_dict.get(param_name), param_name, param_type)
-
-
-def check_positive_int32(param, param_name):
-    check_interval_closed(param, param_name, [1, INT32_MAX])
-
-
-def check_interval_closed(param, param_name, valid_range):
-    if param < valid_range[0] or param > valid_range[1]:
-        raise ValueError("The value of {0} exceeds the closed interval range {1}.".format(param_name, valid_range))
-
-
-def check_num_parallel_workers(value):
-    check_type(value, 'num_parallel_workers', int)
-    if value < 1 or value > cpu_count():
-        raise ValueError("num_parallel_workers exceeds the boundary between 1 and {}!".format(cpu_count()))
-
-
-def check_num_samples(value):
-    check_type(value, 'num_samples', int)
-    if value < 0:
-        raise ValueError("num_samples cannot be less than 0!")
-
-
-def check_dataset_dir(dataset_dir):
-    if not os.path.isdir(dataset_dir) or not os.access(dataset_dir, os.R_OK):
-        raise ValueError("The folder {} does not exist or permission denied!".format(dataset_dir))
-
-
-def check_dataset_file(dataset_file):
-    check_filename(dataset_file)
-    if not os.path.isfile(dataset_file) or not os.access(dataset_file, os.R_OK):
-        raise ValueError("The file {} does not exist or permission denied!".format(dataset_file))
-
-
-def check_sampler_shuffle_shard_options(param_dict):
-    """check for valid shuffle, sampler, num_shards, and shard_id inputs."""
-    shuffle, sampler = param_dict.get('shuffle'), param_dict.get('sampler')
-    num_shards, shard_id = param_dict.get('num_shards'), param_dict.get('shard_id')
-
-    if sampler is not None and not isinstance(sampler, (samplers.BuiltinSampler, samplers.Sampler)):
-        raise TypeError("sampler is not a valid Sampler type.")
-
-    if sampler is not None:
-        if shuffle is not None:
-            raise RuntimeError("sampler and shuffle cannot be specified at the same time.")
-
-        if num_shards is not None:
-            raise RuntimeError("sampler and sharding cannot be specified at the same time.")
-
-    if num_shards is not None:
-        check_positive_int32(num_shards, "num_shards")
-        if shard_id is None:
-            raise RuntimeError("num_shards is specified and currently requires shard_id as well.")
-        if shard_id < 0 or shard_id >= num_shards:
-            raise ValueError("shard_id is invalid, shard_id={}".format(shard_id))
-
-    if num_shards is None and shard_id is not None:
-        raise RuntimeError("shard_id is specified but num_shards is not.")
-
-
-def check_padding_options(param_dict):
-    """ check for valid padded_sample and num_padded of padded samples"""
-    columns_list = param_dict.get('columns_list')
-    block_reader = param_dict.get('block_reader')
-    padded_sample, num_padded = param_dict.get('padded_sample'), param_dict.get('num_padded')
-    if padded_sample is not None:
-        if num_padded is None:
-            raise RuntimeError("padded_sample is specified and requires num_padded as well.")
-        if num_padded < 0:
-            raise ValueError("num_padded is invalid, num_padded={}.".format(num_padded))
-        if columns_list is None:
-            raise RuntimeError("padded_sample is specified and requires columns_list as well.")
-        for column in columns_list:
-            if column not in padded_sample:
-                raise ValueError("padded_sample cannot match columns_list.")
-        if block_reader:
-            raise RuntimeError("block_reader and padded_sample cannot be specified at the same time.")
-
-    if padded_sample is None and num_padded is not None:
-        raise RuntimeError("num_padded is specified but padded_sample is not.")
 
 def check_imagefolderdatasetv2(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(ImageFolderDatasetV2)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(ImageFolderDatasetV2)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
         nreq_param_bool = ['shuffle', 'decode']
         nreq_param_list = ['extensions']
         nreq_param_dict = ['class_indexing']
 
-        # check dataset_dir; required argument
         dataset_dir = param_dict.get('dataset_dir')
-        if dataset_dir is None:
-            raise ValueError("dataset_dir is not provided.")
-        check_dataset_dir(dataset_dir)
-
-        check_param_type(nreq_param_int, param_dict, int)
-
-        check_param_type(nreq_param_bool, param_dict, bool)
-
-        check_param_type(nreq_param_list, param_dict, list)
-
-        check_param_type(nreq_param_dict, param_dict, dict)
+        check_dir(dataset_dir)
 
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_list, param_dict, list)
+        validate_dataset_param_value(nreq_param_dict, param_dict, dict)
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_mnist_cifar_dataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(ManifestDataset, Cifar10/100Dataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(ManifestDataset, Cifar10/100Dataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
         nreq_param_bool = ['shuffle']
 
-        # check dataset_dir; required argument
         dataset_dir = param_dict.get('dataset_dir')
-        if dataset_dir is None:
-            raise ValueError("dataset_dir is not provided.")
-        check_dataset_dir(dataset_dir)
+        check_dir(dataset_dir)
 
-        check_param_type(nreq_param_int, param_dict, int)
-
-        check_param_type(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
 
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_manifestdataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(ManifestDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(ManifestDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
         nreq_param_bool = ['shuffle', 'decode']
         nreq_param_str = ['usage']
         nreq_param_dict = ['class_indexing']
 
-        # check dataset_file; required argument
         dataset_file = param_dict.get('dataset_file')
-        if dataset_file is None:
-            raise ValueError("dataset_file is not provided.")
-        check_dataset_file(dataset_file)
+        check_file(dataset_file)
 
-        check_param_type(nreq_param_int, param_dict, int)
-
-        check_param_type(nreq_param_bool, param_dict, bool)
-
-        check_param_type(nreq_param_str, param_dict, str)
-
-        check_param_type(nreq_param_dict, param_dict, dict)
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_str, param_dict, str)
+        validate_dataset_param_value(nreq_param_dict, param_dict, dict)
 
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_tfrecorddataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(TFRecordDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(TFRecordDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
         nreq_param_list = ['columns_list']
         nreq_param_bool = ['shard_equal_rows']
 
-        # check dataset_files; required argument
         dataset_files = param_dict.get('dataset_files')
-        if dataset_files is None:
-            raise ValueError("dataset_files is not provided.")
         if not isinstance(dataset_files, (str, list)):
             raise TypeError("dataset_files should be of type str or a list of strings.")
 
-        check_param_type(nreq_param_int, param_dict, int)
-
-        check_param_type(nreq_param_list, param_dict, list)
-
-        check_param_type(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_list, param_dict, list)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
 
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_vocdataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(VOCDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(VOCDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
         nreq_param_bool = ['shuffle', 'decode']
         nreq_param_dict = ['class_indexing']
 
-        # check dataset_dir; required argument
         dataset_dir = param_dict.get('dataset_dir')
-        if dataset_dir is None:
-            raise ValueError("dataset_dir is not provided.")
-        check_dataset_dir(dataset_dir)
-        # check task; required argument
-        task = param_dict.get('task')
-        if task is None:
-            raise ValueError("task is not provided.")
-        if not isinstance(task, str):
-            raise TypeError("task is not str type.")
-        # check mode; required argument
-        mode = param_dict.get('mode')
-        if mode is None:
-            raise ValueError("mode is not provided.")
-        if not isinstance(mode, str):
-            raise TypeError("mode is not str type.")
+        check_dir(dataset_dir)
+
+        task = param_dict.get('task')
+        type_check(task, (str,), "task")
+
+        mode = param_dict.get('mode')
+        type_check(mode, (str,), "mode")
 
-        imagesets_file = ""
         if task == "Segmentation":
             imagesets_file = os.path.join(dataset_dir, "ImageSets", "Segmentation", mode + ".txt")
             if param_dict.get('class_indexing') is not None:
@@ -347,92 +163,74 @@ def check_vocdataset(method):
         else:
             raise ValueError("Invalid task : " + task)
 
-        check_dataset_file(imagesets_file)
-
-        check_param_type(nreq_param_int, param_dict, int)
-
-        check_param_type(nreq_param_bool, param_dict, bool)
-
-        check_param_type(nreq_param_dict, param_dict, dict)
+        check_file(imagesets_file)
 
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_dict, param_dict, dict)
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_cocodataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(CocoDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(CocoDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
         nreq_param_bool = ['shuffle', 'decode']
 
-        # check dataset_dir; required argument
         dataset_dir = param_dict.get('dataset_dir')
-        if dataset_dir is None:
-            raise ValueError("dataset_dir is not provided.")
-        check_dataset_dir(dataset_dir)
+        check_dir(dataset_dir)
 
-        # check annotation_file; required argument
         annotation_file = param_dict.get('annotation_file')
-        if annotation_file is None:
-            raise ValueError("annotation_file is not provided.")
-        check_dataset_file(annotation_file)
+        check_file(annotation_file)
 
-        # check task; required argument
         task = param_dict.get('task')
-        if task is None:
-            raise ValueError("task is not provided.")
-        if not isinstance(task, str):
-            raise TypeError("task is not str type.")
+        type_check(task, (str,), "task")
 
         if task not in {'Detection', 'Stuff', 'Panoptic', 'Keypoint'}:
             raise ValueError("Invalid task type")
 
-        check_param_type(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
 
-        check_param_type(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
 
         sampler = param_dict.get('sampler')
         if sampler is not None and isinstance(sampler, samplers.PKSampler):
             raise ValueError("CocoDataset doesn't support PKSampler")
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_celebadataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(CelebADataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(CelebADataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
         nreq_param_bool = ['shuffle', 'decode']
         nreq_param_list = ['extensions']
         nreq_param_str = ['dataset_type']
 
-        # check dataset_dir; required argument
         dataset_dir = param_dict.get('dataset_dir')
-        if dataset_dir is None:
-            raise ValueError("dataset_dir is not provided.")
-        check_dataset_dir(dataset_dir)
 
-        check_param_type(nreq_param_int, param_dict, int)
+        check_dir(dataset_dir)
 
-        check_param_type(nreq_param_bool, param_dict, bool)
-
-        check_param_type(nreq_param_list, param_dict, list)
-
-        check_param_type(nreq_param_str, param_dict, str)
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_list, param_dict, list)
+        validate_dataset_param_value(nreq_param_str, param_dict, str)
 
         dataset_type = param_dict.get('dataset_type')
         if dataset_type is not None and dataset_type not in ('all', 'train', 'valid', 'test'):
@@ -444,67 +242,58 @@ def check_celebadataset(method):
         if sampler is not None and isinstance(sampler, samplers.PKSampler):
             raise ValueError("CelebADataset does not support PKSampler.")
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_minddataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(MindDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(MindDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'seed', 'num_shards', 'shard_id', 'num_padded']
         nreq_param_list = ['columns_list']
         nreq_param_bool = ['block_reader']
         nreq_param_dict = ['padded_sample']
 
-        # check dataset_file; required argument
         dataset_file = param_dict.get('dataset_file')
-        if dataset_file is None:
-            raise ValueError("dataset_file is not provided.")
         if isinstance(dataset_file, list):
             for f in dataset_file:
-                check_dataset_file(f)
+                check_file(f)
         else:
-            check_dataset_file(dataset_file)
+            check_file(dataset_file)
 
-        check_param_type(nreq_param_int, param_dict, int)
-
-        check_param_type(nreq_param_list, param_dict, list)
-
-        check_param_type(nreq_param_bool, param_dict, bool)
-
-        check_param_type(nreq_param_dict, param_dict, dict)
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_list, param_dict, list)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_dict, param_dict, dict)
 
         check_sampler_shuffle_shard_options(param_dict)
 
         check_padding_options(param_dict)
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_generatordataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(GeneratorDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(GeneratorDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
-        # check generator_function; required argument
         source = param_dict.get('source')
-        if source is None:
-            raise ValueError("source is not provided.")
+
         if not callable(source):
             try:
                 iter(source)
             except TypeError:
                 raise TypeError("source should be callable, iterable or random accessible")
 
-        # check column_names or schema; required argument
         column_names = param_dict.get('column_names')
         if column_names is not None:
             check_columns(column_names, "column_names")
@@ -518,11 +307,11 @@ def check_generatordataset(method):
 
         # check optional argument
         nreq_param_int = ["num_samples", "num_parallel_workers", "num_shards", "shard_id"]
-        check_param_type(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
         nreq_param_list = ["column_types"]
-        check_param_type(nreq_param_list, param_dict, list)
+        validate_dataset_param_value(nreq_param_list, param_dict, list)
         nreq_param_bool = ["shuffle"]
-        check_param_type(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
 
         num_shards = param_dict.get("num_shards")
         shard_id = param_dict.get("shard_id")
@@ -530,9 +319,9 @@ def check_generatordataset(method):
             # These two parameters appear together.
             raise ValueError("num_shards and shard_id need to be passed in together")
         if num_shards is not None:
-            check_positive_int32(num_shards, "num_shards")
+            check_pos_int32(num_shards, "num_shards")
             if shard_id >= num_shards:
-                raise ValueError("shard_id should be less than num_shards")
+                raise ValueError("shard_id should be less than num_shards.")
 
         sampler = param_dict.get("sampler")
         if sampler is not None:
@@ -551,81 +340,73 @@ def check_generatordataset(method):
         if num_shards is not None and not hasattr(source, "__getitem__"):
             raise ValueError("num_shards is not supported if source does not have attribute '__getitem__'")
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+def check_random_dataset(method):
+    """A wrapper that wraps a parameter checker to the original Dataset(RandomDataset)."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
+
+        nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id', 'total_rows']
+        nreq_param_bool = ['shuffle']
+        nreq_param_list = ['columns_list']
+
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
+        validate_dataset_param_value(nreq_param_list, param_dict, list)
+
+        check_sampler_shuffle_shard_options(param_dict)
+
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
-def check_batch_size(batch_size):
-    if not (isinstance(batch_size, int) or (callable(batch_size))):
-        raise TypeError("batch_size should either be an int or a callable.")
-    if callable(batch_size):
-        sig = ins.signature(batch_size)
-        if len(sig.parameters) != 1:
-            raise ValueError("batch_size callable should take one parameter (BatchInfo).")
-
-
-def check_count(count):
-    check_type(count, 'count', int)
-    if (count <= 0 and count != -1) or count > INT32_MAX:
-        raise ValueError("count should be either -1 or positive integer.")
-
-
-def check_columns(columns, name):
-    if isinstance(columns, list):
-        for column in columns:
-            if not isinstance(column, str):
-                raise TypeError("Each column in {0} should be of type str. Got {1}.".format(name, type(column)))
-    elif not isinstance(columns, str):
-        raise TypeError("{} should be either a list of strings or a single string.".format(name))
-
-
 def check_pad_info(key, val):
     """check the key and value pair of pad_info in batch"""
-    check_type(key, "key in pad_info", str)
+    type_check(key, (str,), "key in pad_info")
+
     if val is not None:
         assert len(val) == 2, "value of pad_info should be a tuple of size 2"
-        check_type(val, "value in pad_info", tuple)
+        type_check(val, (tuple,), "value in pad_info")
+
         if val[0] is not None:
-            check_type(val[0], "pad_shape", list)
+            type_check(val[0], (list,), "pad_shape")
+
             for dim in val[0]:
                 if dim is not None:
-                    check_type(dim, "dim in pad_shape", int)
+                    type_check(dim, (int,), "dim in pad_shape")
                     assert dim > 0, "pad shape should be positive integers"
         if val[1] is not None:
-            check_type(val[1], "pad_value", (int, float, str, bytes))
+            type_check(val[1], (int, float, str, bytes), "pad_value")
 
 
 def check_bucket_batch_by_length(method):
     """check the input arguments of bucket_batch_by_length."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [column_names, bucket_boundaries, bucket_batch_sizes, element_length_function, pad_info,
+         pad_to_bucket_boundary, drop_remainder], _ = parse_user_args(method, *args, **kwargs)
 
         nreq_param_list = ['column_names', 'bucket_boundaries', 'bucket_batch_sizes']
-        check_param_type(nreq_param_list, param_dict, list)
+
+        type_check_list([column_names, bucket_boundaries, bucket_batch_sizes], (list,), nreq_param_list)
 
         nbool_param_list = ['pad_to_bucket_boundary', 'drop_remainder']
-        check_param_type(nbool_param_list, param_dict, bool)
+        type_check_list([pad_to_bucket_boundary, drop_remainder], (bool,), nbool_param_list)
 
         # check column_names: must be list of string.
-        column_names = param_dict.get("column_names")
+        check_columns(column_names, "column_names")
 
-        if not column_names:
-            raise ValueError("column_names cannot be empty")
-
-        all_string = all(isinstance(item, str) for item in column_names)
-        if not all_string:
-            raise TypeError("column_names should be a list of str.")
-
-        element_length_function = param_dict.get("element_length_function")
         if element_length_function is None and len(column_names) != 1:
             raise ValueError("If element_length_function is not specified, exactly one column name should be passed.")
 
         # check bucket_boundaries: must be list of int, positive and strictly increasing
-        bucket_boundaries = param_dict.get('bucket_boundaries')
-
         if not bucket_boundaries:
             raise ValueError("bucket_boundaries cannot be empty.")
 
@@ -633,16 +414,15 @@ def check_bucket_batch_by_length(method):
         if not all_int:
             raise TypeError("bucket_boundaries should be a list of int.")
 
-        all_non_negative = all(item >= 0 for item in bucket_boundaries)
+        all_non_negative = all(item > 0 for item in bucket_boundaries)
         if not all_non_negative:
-            raise ValueError("bucket_boundaries cannot contain any negative numbers.")
+            raise ValueError("bucket_boundaries must only contain positive numbers.")
 
         for i in range(len(bucket_boundaries) - 1):
             if not bucket_boundaries[i + 1] > bucket_boundaries[i]:
                 raise ValueError("bucket_boundaries should be strictly increasing.")
 
         # check bucket_batch_sizes: must be list of int and positive
-        bucket_batch_sizes = param_dict.get('bucket_batch_sizes')
         if len(bucket_batch_sizes) != len(bucket_boundaries) + 1:
             raise ValueError("bucket_batch_sizes must contain one element more than bucket_boundaries.")
 
@@ -654,12 +434,13 @@ def check_bucket_batch_by_length(method):
         if not all_non_negative:
             raise ValueError("bucket_batch_sizes should be a list of positive numbers.")
 
-        if param_dict.get('pad_info') is not None:
-            check_type(param_dict["pad_info"], "pad_info", dict)
-            for k, v in param_dict.get('pad_info').items():
+        if pad_info is not None:
+            type_check(pad_info, (dict,), "pad_info")
+
+            for k, v in pad_info.items():
                 check_pad_info(k, v)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -668,37 +449,33 @@ def check_batch(method):
     """check the input arguments of batch."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [batch_size, drop_remainder, num_parallel_workers, per_batch_map,
+         input_columns, pad_info], param_dict = parse_user_args(method, *args, **kwargs)
 
-        nreq_param_int = ['num_parallel_workers']
-        nreq_param_bool = ['drop_remainder']
-        nreq_param_columns = ['input_columns']
+        if not (isinstance(batch_size, int) or (callable(batch_size))):
+            raise TypeError("batch_size should either be an int or a callable.")
 
-        # check batch_size; required argument
-        batch_size = param_dict.get("batch_size")
-        if batch_size is None:
-            raise ValueError("batch_size is not provided.")
-        check_batch_size(batch_size)
+        if callable(batch_size):
+            sig = ins.signature(batch_size)
+            if len(sig.parameters) != 1:
+                raise ValueError("batch_size callable should take one parameter (BatchInfo).")
 
-        check_param_type(nreq_param_int, param_dict, int)
+        if num_parallel_workers is not None:
+            check_num_parallel_workers(num_parallel_workers)
+        type_check(drop_remainder, (bool,), "drop_remainder")
 
-        check_param_type(nreq_param_bool, param_dict, bool)
-
-        if (param_dict.get('pad_info') is not None) and (param_dict.get('per_batch_map') is not None):
+        if (pad_info is not None) and (per_batch_map is not None):
             raise ValueError("pad_info and per_batch_map can't both be set")
 
-        if param_dict.get('pad_info') is not None:
-            check_type(param_dict["pad_info"], "pad_info", dict)
+        if pad_info is not None:
+            type_check(param_dict["pad_info"], (dict,), "pad_info")
             for k, v in param_dict.get('pad_info').items():
                 check_pad_info(k, v)
 
-        for param_name in nreq_param_columns:
-            param = param_dict.get(param_name)
-            if param is not None:
-                check_columns(param, param_name)
+        if input_columns is not None:
+            check_columns(input_columns, "input_columns")
 
-        per_batch_map, input_columns = param_dict.get('per_batch_map'), param_dict.get('input_columns')
         if (per_batch_map is None) != (input_columns is None):
             # These two parameters appear together.
             raise ValueError("per_batch_map and input_columns need to be passed in together.")
@@ -709,43 +486,38 @@ def check_batch(method):
             if len(input_columns) != (len(ins.signature(per_batch_map).parameters) - 1):
                 raise ValueError("the signature of per_batch_map should match with input columns")
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
+
 def check_sync_wait(method):
     """check the input arguments of sync_wait."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [condition_name, num_batch, _], _ = parse_user_args(method, *args, **kwargs)
 
-        nreq_param_str = ['condition_name']
-        nreq_param_int = ['step_size']
+        type_check(condition_name, (str,), "condition_name")
+        type_check(num_batch, (int,), "num_batch")
 
-        check_param_type(nreq_param_int, param_dict, int)
-
-        check_param_type(nreq_param_str, param_dict, str)
-
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
+
 def check_shuffle(method):
     """check the input arguments of shuffle."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [buffer_size], _ = parse_user_args(method, *args, **kwargs)
 
-        # check buffer_size; required argument
-        buffer_size = param_dict.get("buffer_size")
-        if buffer_size is None:
-            raise ValueError("buffer_size is not provided.")
-        check_type(buffer_size, 'buffer_size', int)
-        check_interval_closed(buffer_size, 'buffer_size', [2, INT32_MAX])
+        type_check(buffer_size, (int,), "buffer_size")
 
-        return method(*args, **kwargs)
+        check_value(buffer_size, [2, INT32_MAX], "buffer_size")
+
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -754,23 +526,25 @@ def check_map(method):
     """check the input arguments of map."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [input_columns, _, output_columns, columns_order, num_parallel_workers, python_multiprocessing, cache], _ = \
+            parse_user_args(method, *args, **kwargs)
 
-        nreq_param_list = ['columns_order']
-        nreq_param_int = ['num_parallel_workers']
         nreq_param_columns = ['input_columns', 'output_columns']
-        nreq_param_bool = ['python_multiprocessing']
 
-        check_param_type(nreq_param_list, param_dict, list)
-        check_param_type(nreq_param_int, param_dict, int)
-        check_param_type(nreq_param_bool, param_dict, bool)
-        for param_name in nreq_param_columns:
-            param = param_dict.get(param_name)
+        if columns_order is not None:
+            type_check(columns_order, (list,), "columns_order")
+        if num_parallel_workers is not None:
+            check_num_parallel_workers(num_parallel_workers)
+        type_check(python_multiprocessing, (bool,), "python_multiprocessing")
+        if cache is not None:
+            type_check(cache, (cache_client.DatasetCache,), "cache")
+
+        for param_name, param in zip(nreq_param_columns, [input_columns, output_columns]):
             if param is not None:
                 check_columns(param, param_name)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -779,19 +553,20 @@ def check_filter(method):
     """"check the input arguments of filter."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-        predicate = param_dict.get("predicate")
+    def new_method(self, *args, **kwargs):
+        [predicate, input_columns, num_parallel_workers], _ = parse_user_args(method, *args, **kwargs)
         if not callable(predicate):
             raise TypeError("Predicate should be a python function or a callable python object.")
 
-        nreq_param_int = ['num_parallel_workers']
-        check_param_type(nreq_param_int, param_dict, int)
-        param_name = "input_columns"
-        param = param_dict.get(param_name)
-        if param is not None:
-            check_columns(param, param_name)
-        return method(*args, **kwargs)
+        check_num_parallel_workers(num_parallel_workers)
+
+        if num_parallel_workers is not None:
+            check_num_parallel_workers(num_parallel_workers)
+
+        if input_columns is not None:
+            check_columns(input_columns, "input_columns")
+
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -800,14 +575,13 @@ def check_repeat(method):
     """check the input arguments of repeat."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [count], _ = parse_user_args(method, *args, **kwargs)
 
-        count = param_dict.get('count')
-        if count is not None:
-            check_count(count)
-
-        return method(*args, **kwargs)
+        type_check(count, (int, type(None)), "repeat")
+        if isinstance(count, int):
+            check_value(count, (-1, INT32_MAX), "count")
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -816,15 +590,13 @@ def check_skip(method):
     """check the input arguments of skip."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [count], _ = parse_user_args(method, *args, **kwargs)
 
-        count = param_dict.get('count')
-        check_type(count, 'count', int)
-        if count < 0:
-            raise ValueError("Skip count must be positive integer or 0.")
+        type_check(count, (int,), "count")
+        check_value(count, (-1, INT32_MAX), "count")
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -833,13 +605,32 @@ def check_take(method):
     """check the input arguments of take."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [count], _ = parse_user_args(method, *args, **kwargs)
+        type_check(count, (int,), "count")
+        if (count <= 0 and count != -1) or count > INT32_MAX:
+            raise ValueError("count should be either -1 or positive integer.")
 
-        count = param_dict.get('count')
-        check_count(count)
+        return method(self, *args, **kwargs)
 
-        return method(*args, **kwargs)
+    return new_method
+
+
+def check_positive_int32(method):
+    """check whether the input argument is positive and int, only works for functions with one input."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [count], param_dict = parse_user_args(method, *args, **kwargs)
+        para_name = None
+        for key in list(param_dict.keys()):
+            if key not in ['self', 'cls']:
+                para_name = key
+        # Need to get default value of param
+        if count is not None:
+            check_pos_int32(count, para_name)
+
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -849,13 +640,8 @@ def check_zip(method):
 
     @wraps(method)
     def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-
-        # check datasets; required argument
-        ds = param_dict.get("datasets")
-        if ds is None:
-            raise ValueError("datasets is not provided.")
-        check_type(ds, 'datasets', tuple)
+        [ds], _ = parse_user_args(method, *args, **kwargs)
+        type_check(ds, (tuple,), "datasets")
 
         return method(*args, **kwargs)
 
@@ -866,18 +652,11 @@ def check_zip_dataset(method):
     """check the input arguments of zip method in `Dataset`."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [ds], _ = parse_user_args(method, *args, **kwargs)
+        type_check(ds, (tuple, datasets.Dataset), "datasets")
 
-        # check datasets; required argument
-        ds = param_dict.get("datasets")
-        if ds is None:
-            raise ValueError("datasets is not provided.")
-
-        if not isinstance(ds, (tuple, datasets.Dataset)):
-            raise TypeError("datasets is not tuple or of type Dataset.")
-
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -886,18 +665,13 @@ def check_concat(method):
     """check the input arguments of concat method in `Dataset`."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-
-        # check datasets; required argument
-        ds = param_dict.get("datasets")
-        if ds is None:
-            raise ValueError("datasets is not provided.")
-
-        if not isinstance(ds, (list, datasets.Dataset)):
-            raise TypeError("datasets is not list or of type Dataset.")
-
-        return method(*args, **kwargs)
+    def new_method(self, *args, **kwargs):
+        [ds], _ = parse_user_args(method, *args, **kwargs)
+        type_check(ds, (list, datasets.Dataset), "datasets")
+        if isinstance(ds, list):
+            dataset_names = ["dataset[{0}]".format(i) for i in range(len(ds)) if isinstance(ds, list)]
+            type_check_list(ds, (datasets.Dataset,), dataset_names)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -906,26 +680,23 @@ def check_rename(method):
     """check the input arguments of rename."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        values, _ = parse_user_args(method, *args, **kwargs)
 
         req_param_columns = ['input_columns', 'output_columns']
-        # check req_param_list; required arguments
-        for param_name in req_param_columns:
-            param = param_dict.get(param_name)
-            if param is None:
-                raise ValueError("{} is not provided.".format(param_name))
+        for param_name, param in zip(req_param_columns, values):
             check_columns(param, param_name)
 
         input_size, output_size = 1, 1
-        if isinstance(param_dict.get(req_param_columns[0]), list):
-            input_size = len(param_dict.get(req_param_columns[0]))
-        if isinstance(param_dict.get(req_param_columns[1]), list):
-            output_size = len(param_dict.get(req_param_columns[1]))
+        input_columns, output_columns = values
+        if isinstance(input_columns, list):
+            input_size = len(input_columns)
+        if isinstance(output_columns, list):
+            output_size = len(output_columns)
         if input_size != output_size:
             raise ValueError("Number of column in input_columns and output_columns is not equal.")
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -934,75 +705,54 @@ def check_project(method):
     """check the input arguments of project."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
-
-        # check columns; required argument
-        columns = param_dict.get("columns")
-        if columns is None:
-            raise ValueError("columns is not provided.")
+    def new_method(self, *args, **kwargs):
+        [columns], _ = parse_user_args(method, *args, **kwargs)
         check_columns(columns, 'columns')
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
-def check_shape(shape, name):
-    if isinstance(shape, list):
-        for element in shape:
-            if not isinstance(element, int):
-                raise TypeError(
-                    "Each element in {0} should be of type int. Got {1}.".format(name, type(element)))
-    else:
-        raise TypeError("Expected int list.")
-
-
 def check_add_column(method):
     """check the input arguments of add_column."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [name, de_type, shape], _ = parse_user_args(method, *args, **kwargs)
 
-        # check name; required argument
-        name = param_dict.get("name")
-        if not isinstance(name, str) or not name:
+        type_check(name, (str,), "name")
+
+        if not name:
             raise TypeError("Expected non-empty string.")
 
-        # check type; required argument
-        de_type = param_dict.get("de_type")
         if de_type is not None:
             if not isinstance(de_type, typing.Type) and not check_valid_detype(de_type):
                 raise TypeError("Unknown column type.")
         else:
             raise TypeError("Expected non-empty string.")
 
-        # check shape
-        shape = param_dict.get("shape")
         if shape is not None:
-            check_shape(shape, "shape")
+            type_check(shape, (list,), "shape")
+            shape_names = ["shape[{0}]".format(i) for i in range(len(shape))]
+            type_check_list(shape, (int,), shape_names)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_cluedataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(CLUEDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(CLUEDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
 
-        # check dataset_files; required argument
         dataset_files = param_dict.get('dataset_files')
-        if dataset_files is None:
-            raise ValueError("dataset_files is not provided.")
-        if not isinstance(dataset_files, (str, list)):
-            raise TypeError("dataset_files should be of type str or a list of strings.")
+        type_check(dataset_files, (str, list), "dataset files")
 
         # check task
         task_param = param_dict.get('task')
@@ -1014,36 +764,29 @@ def check_cluedataset(method):
         if usage_param not in ['train', 'test', 'eval']:
             raise ValueError("usage should be train, test or eval")
 
-        check_param_type(nreq_param_int, param_dict, int)
-
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_textfiledataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(TextFileDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(TextFileDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
         nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
 
-        # check dataset_files; required argument
         dataset_files = param_dict.get('dataset_files')
-        if dataset_files is None:
-            raise ValueError("dataset_files is not provided.")
-        if not isinstance(dataset_files, (str, list)):
-            raise TypeError("dataset_files should be of type str or a list of strings.")
-
-        check_param_type(nreq_param_int, param_dict, int)
-
+        type_check(dataset_files, (str, list), "dataset files")
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
         check_sampler_shuffle_shard_options(param_dict)
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -1052,19 +795,16 @@ def check_split(method):
     """check the input arguments of split."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [sizes, randomize], _ = parse_user_args(method, *args, **kwargs)
 
-        nreq_param_list = ['sizes']
-        nreq_param_bool = ['randomize']
-        check_param_type(nreq_param_list, param_dict, list)
-        check_param_type(nreq_param_bool, param_dict, bool)
+        type_check(sizes, (list,), "sizes")
+        type_check(randomize, (bool,), "randomize")
 
         # check sizes: must be list of float or list of int
-        sizes = param_dict.get('sizes')
-
         if not sizes:
             raise ValueError("sizes cannot be empty.")
+
         all_int = all(isinstance(item, int) for item in sizes)
         all_float = all(isinstance(item, float) for item in sizes)
 
@@ -1085,7 +825,7 @@ def check_split(method):
             if not abs(sum(sizes) - 1) < epsilon:
                 raise ValueError("sizes is a list of float, but the percentages do not sum up to 1.")
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -1094,123 +834,85 @@ def check_gnn_graphdata(method):
     """check the input arguments of graphdata."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [dataset_file, num_parallel_workers], _ = parse_user_args(method, *args, **kwargs)
+        check_file(dataset_file)
 
-        # check dataset_file; required argument
-        dataset_file = param_dict.get('dataset_file')
-        if dataset_file is None:
-            raise ValueError("dataset_file is not provided.")
-        check_dataset_file(dataset_file)
-
-        nreq_param_int = ['num_parallel_workers']
-
-        check_param_type(nreq_param_int, param_dict, int)
-
-        return method(*args, **kwargs)
+        if num_parallel_workers is not None:
+            check_num_parallel_workers(num_parallel_workers)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
-def check_gnn_list_or_ndarray(param, param_name):
-    """Check if the input parameter is list or numpy.ndarray."""
-
-    if isinstance(param, list):
-        for m in param:
-            if not isinstance(m, int):
-                raise TypeError(
-                    "Each member in {0} should be of type int. Got {1}.".format(param_name, type(m)))
-    elif isinstance(param, np.ndarray):
-        if not param.dtype == np.int32:
-            raise TypeError("Each member in {0} should be of type int32. Got {1}.".format(
-                param_name, param.dtype))
-    else:
-        raise TypeError("Wrong input type for {0}, should be list or numpy.ndarray, got {1}".format(
-            param_name, type(param)))
-
-
 def check_gnn_get_all_nodes(method):
-    """A wrapper that wrap a parameter checker to the GNN `get_all_nodes` function."""
+    """A wrapper that wraps a parameter checker to the GNN `get_all_nodes` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [node_type], _ = parse_user_args(method, *args, **kwargs)
+        type_check(node_type, (int,), "node_type")
 
-        # check node_type; required argument
-        check_type(param_dict.get("node_type"), 'node_type', int)
-
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_gnn_get_all_edges(method):
-    """A wrapper that wrap a parameter checker to the GNN `get_all_edges` function."""
+    """A wrapper that wraps a parameter checker to the GNN `get_all_edges` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [edge_type], _ = parse_user_args(method, *args, **kwargs)
+        type_check(edge_type, (int,), "edge_type")
 
-        # check node_type; required argument
-        check_type(param_dict.get("edge_type"), 'edge_type', int)
-
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_gnn_get_nodes_from_edges(method):
-    """A wrapper that wrap a parameter checker to the GNN `get_nodes_from_edges` function."""
+    """A wrapper that wraps a parameter checker to the GNN `get_nodes_from_edges` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [edge_list], _ = parse_user_args(method, *args, **kwargs)
+        check_gnn_list_or_ndarray(edge_list, "edge_list")
 
-        # check edge_list; required argument
-        check_gnn_list_or_ndarray(param_dict.get("edge_list"), 'edge_list')
-
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_gnn_get_all_neighbors(method):
-    """A wrapper that wrap a parameter checker to the GNN `get_all_neighbors` function."""
+    """A wrapper that wraps a parameter checker to the GNN `get_all_neighbors` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [node_list, neighbour_type], _ = parse_user_args(method, *args, **kwargs)
 
-        # check node_list; required argument
-        check_gnn_list_or_ndarray(param_dict.get("node_list"), 'node_list')
+        check_gnn_list_or_ndarray(node_list, 'node_list')
+        type_check(neighbour_type, (int,), "neighbour_type")
 
-        # check neighbor_type; required argument
-        check_type(param_dict.get("neighbor_type"), 'neighbor_type', int)
-
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_gnn_get_sampled_neighbors(method):
-    """A wrapper that wrap a parameter checker to the GNN `get_sampled_neighbors` function."""
+    """A wrapper that wraps a parameter checker to the GNN `get_sampled_neighbors` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [node_list, neighbor_nums, neighbor_types], _ = parse_user_args(method, *args, **kwargs)
 
-        # check node_list; required argument
-        check_gnn_list_or_ndarray(param_dict.get("node_list"), 'node_list')
+        check_gnn_list_or_ndarray(node_list, 'node_list')
 
-        # check neighbor_nums; required argument
-        neighbor_nums = param_dict.get("neighbor_nums")
         check_gnn_list_or_ndarray(neighbor_nums, 'neighbor_nums')
         if not neighbor_nums or len(neighbor_nums) > 6:
             raise ValueError("Wrong number of input members for {0}, should be between 1 and 6, got {1}".format(
                 'neighbor_nums', len(neighbor_nums)))
 
-        # check neighbor_types; required argument
-        neighbor_types = param_dict.get("neighbor_types")
         check_gnn_list_or_ndarray(neighbor_types, 'neighbor_types')
         if not neighbor_types or len(neighbor_types) > 6:
             raise ValueError("Wrong number of input members for {0}, should be between 1 and 6, got {1}".format(
@@ -1220,47 +922,41 @@ def check_gnn_get_sampled_neighbors(method):
             raise ValueError(
                 "The number of members of neighbor_nums and neighbor_types is inconsistent")
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_gnn_get_neg_sampled_neighbors(method):
-    """A wrapper that wrap a parameter checker to the GNN `get_neg_sampled_neighbors` function."""
+    """A wrapper that wraps a parameter checker to the GNN `get_neg_sampled_neighbors` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [node_list, neg_neighbor_num, neg_neighbor_type], _ = parse_user_args(method, *args, **kwargs)
 
-        # check node_list; required argument
-        check_gnn_list_or_ndarray(param_dict.get("node_list"), 'node_list')
+        check_gnn_list_or_ndarray(node_list, 'node_list')
+        type_check(neg_neighbor_num, (int,), "neg_neighbor_num")
+        type_check(neg_neighbor_type, (int,), "neg_neighbor_type")
 
-        # check neg_neighbor_num; required argument
-        check_type(param_dict.get("neg_neighbor_num"), 'neg_neighbor_num', int)
-
-        # check neg_neighbor_type; required argument
-        check_type(param_dict.get("neg_neighbor_type"),
-                   'neg_neighbor_type', int)
-
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_gnn_random_walk(method):
-    """A wrapper that wrap a parameter checker to the GNN `random_walk` function."""
+    """A wrapper that wraps a parameter checker to the GNN `random_walk` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [target_nodes, meta_path, step_home_param, step_away_param, default_node], _ = parse_user_args(method, *args,
+                                                                                                       **kwargs)
+        check_gnn_list_or_ndarray(target_nodes, 'target_nodes')
+        check_gnn_list_or_ndarray(meta_path, 'meta_path')
+        type_check(step_home_param, (float,), "step_home_param")
+        type_check(step_away_param, (float,), "step_away_param")
+        type_check(default_node, (int,), "default_node")
 
-        # check node_list; required argument
-        check_gnn_list_or_ndarray(param_dict.get("target_nodes"), 'target_nodes')
-
-        # check meta_path; required argument
-        check_gnn_list_or_ndarray(param_dict.get("meta_path"), 'meta_path')
-
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -1268,8 +964,7 @@ def check_gnn_random_walk(method):
 def check_aligned_list(param, param_name, member_type):
     """Check whether the structure of each member of the list is the same."""
 
-    if not isinstance(param, list):
-        raise TypeError("Parameter {0} is not a list".format(param_name))
+    type_check(param, (list,), "param")
     if not param:
         raise TypeError(
             "Parameter {0} or its members are empty".format(param_name))
@@ -1278,6 +973,7 @@ def check_aligned_list(param, param_name, member_type):
     for member in param:
         if isinstance(member, list):
             check_aligned_list(member, param_name, member_type)
+
             if member_have_list not in (None, True):
                 raise TypeError("The type of each member of the parameter {0} is inconsistent".format(
                     param_name))
@@ -1287,9 +983,7 @@ def check_aligned_list(param, param_name, member_type):
             member_have_list = True
             list_len = len(member)
         else:
-            if not isinstance(member, member_type):
-                raise TypeError("Each member in {0} should be of type int. Got {1}.".format(
-                    param_name, type(member)))
+            type_check(member, (member_type,), param_name)
             if member_have_list not in (None, False):
                 raise TypeError("The type of each member of the parameter {0} is inconsistent".format(
                     param_name))
@@ -1297,53 +991,65 @@ def check_aligned_list(param, param_name, member_type):
 
 
 def check_gnn_get_node_feature(method):
-    """A wrapper that wrap a parameter checker to the GNN `get_node_feature` function."""
+    """A wrapper that wraps a parameter checker to the GNN `get_node_feature` function."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        [node_list, feature_types], _ = parse_user_args(method, *args, **kwargs)
 
-        # check node_list; required argument
-        node_list = param_dict.get("node_list")
+        type_check(node_list, (list, np.ndarray), "node_list")
         if isinstance(node_list, list):
             check_aligned_list(node_list, 'node_list', int)
         elif isinstance(node_list, np.ndarray):
             if not node_list.dtype == np.int32:
                 raise TypeError("Each member in {0} should be of type int32. Got {1}.".format(
                     node_list, node_list.dtype))
-        else:
-            raise TypeError("Wrong input type for {0}, should be list or numpy.ndarray, got {1}".format(
-                'node_list', type(node_list)))
 
-        # check feature_types; required argument
-        check_gnn_list_or_ndarray(param_dict.get(
-            "feature_types"), 'feature_types')
+        check_gnn_list_or_ndarray(feature_types, 'feature_types')
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
+def check_gnn_get_edge_feature(method):
+    """A wrapper that wrap a parameter checker to the GNN `get_edge_feature` function."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [edge_list, feature_types], _ = parse_user_args(method, *args, **kwargs)
+
+        type_check(edge_list, (list, np.ndarray), "edge_list")
+        if isinstance(edge_list, list):
+            check_aligned_list(edge_list, 'edge_list', int)
+        elif isinstance(edge_list, np.ndarray):
+            if not edge_list.dtype == np.int32:
+                raise TypeError("Each member in {0} should be of type int32. Got {1}.".format(
+                    edge_list, edge_list.dtype))
+
+        check_gnn_list_or_ndarray(feature_types, 'feature_types')
+
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_numpyslicesdataset(method):
-    """A wrapper that wrap a parameter checker to the original Dataset(NumpySlicesDataset)."""
+    """A wrapper that wraps a parameter checker to the original Dataset(NumpySlicesDataset)."""
 
     @wraps(method)
-    def new_method(*args, **kwargs):
-        param_dict = make_param_dict(method, args, kwargs)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
 
-        # check data; required argument
-        data = param_dict.get('data')
-        if not isinstance(data, (list, tuple, dict, np.ndarray)):
-            raise TypeError("Unsupported data type: {}, only support some common python data type, "
-                            "like list, tuple, dict, and numpy array.".format(type(data)))
-        if isinstance(data, tuple) and not isinstance(data[0], (list, np.ndarray)):
-            raise TypeError("Unsupported data type: when input is tuple, only support some common python "
-                            "data type, like tuple of lists and tuple of numpy arrays.")
+        data = param_dict.get("data")
+        column_names = param_dict.get("column_names")
         if not data:
-            raise ValueError("Input data is empty.")
+            raise ValueError("Argument data cannot be empty")
+        type_check(data, (list, tuple, dict, np.ndarray), "data")
+        if isinstance(data, tuple):
+            type_check(data[0], (list, np.ndarray), "data[0]")
 
         # check column_names
-        column_names = param_dict.get('column_names')
         if column_names is not None:
             check_columns(column_names, "column_names")
 
@@ -1364,6 +1070,6 @@ def check_numpyslicesdataset(method):
                     raise ValueError("Num of input column names is {0}, but required is {1} as data is list."
                                      .format(column_num, 1))
 
-        return method(*args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
diff --git a/mindspore/dataset/text/transforms.py b/mindspore/dataset/text/transforms.py
index 8b0d47df253..30fa2b8f429 100644
--- a/mindspore/dataset/text/transforms.py
+++ b/mindspore/dataset/text/transforms.py
@@ -52,8 +52,9 @@ import mindspore._c_dataengine as cde
 
 from .utils import JiebaMode, NormalizeForm, to_str
 from .validators import check_lookup, check_jieba_add_dict, \
-    check_jieba_add_word, check_jieba_init, check_ngram, check_pair_truncate, \
-    check_to_number, check_python_tokenizer
+    check_jieba_add_word, check_jieba_init, check_with_offsets, check_unicode_script_tokenizer,\
+    check_wordpiece_tokenizer, check_regex_tokenizer, check_basic_tokenizer, check_ngram, check_pair_truncate,\
+    check_to_number, check_bert_tokenizer, check_python_tokenizer
 from ..core.datatypes import mstype_to_detype
 
 
@@ -63,17 +64,13 @@ class Lookup(cde.LookupOp):
 
     Args:
         vocab(Vocab): a Vocab object.
-        unknown(int, optional): default id to lookup a word that is out of vocab. If no argument is passed, 1 will be
-            used to be the default id which is the convention for unknown_token <unk>. Otherwise, user is strongly
-            encouraged to pass in the id for <unk> (default=None).
+        unknown_token(str, optional): word to use for lookup if the word being looked up is out of Vocabulary (oov).
+            If unknown_token is oov, runtime error will be thrown (default=None).
     """
 
     @check_lookup
-    def __init__(self, vocab, unknown=None):
-        if unknown is None:
-            super().__init__(vocab)
-        else:
-            super().__init__(vocab, unknown)
+    def __init__(self, vocab, unknown_token=None):
+        super().__init__(vocab, unknown_token)
 
 
 class Ngram(cde.NgramOp):
@@ -98,7 +95,7 @@ class Ngram(cde.NgramOp):
     """
 
     @check_ngram
-    def __init__(self, n, left_pad=None, right_pad=None, separator=None):
+    def __init__(self, n, left_pad=("", 0), right_pad=("", 0), separator=" "):
         super().__init__(ngrams=n, l_pad_len=left_pad[1], r_pad_len=right_pad[1], l_pad_token=left_pad[0],
                          r_pad_token=right_pad[0], separator=separator)
 
@@ -125,15 +122,31 @@ class JiebaTokenizer(cde.JiebaTokenizerOp):
             - JiebaMode.MP, tokenize with MPSegment algorithm.
             - JiebaMode.HMM, tokenize with Hiddel Markov Model Segment algorithm.
             - JiebaMode.MIX, tokenize with a mix of MPSegment and HMMSegment algorithm.
+        with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+    Examples:
+        >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+        >>> tokenizer_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=False)
+        >>> data = data.map(operations=tokenizer_op)
+        >>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
+        >>> #                                                   ["offsets_limit", dtype=uint32]}
+        >>> tokenizer_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+        >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+        >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
     """
 
     @check_jieba_init
-    def __init__(self, hmm_path, mp_path, mode=JiebaMode.MIX):
+    def __init__(self, hmm_path, mp_path, mode=JiebaMode.MIX, with_offsets=False):
+        if not isinstance(mode, JiebaMode):
+            raise TypeError("Wrong input type for mode, should be JiebaMode.")
+
         self.mode = mode
         self.__check_path__(hmm_path)
         self.__check_path__(mp_path)
+        self.with_offsets = with_offsets
         super().__init__(hmm_path, mp_path,
-                         DE_C_INTER_JIEBA_MODE[mode])
+                         DE_C_INTER_JIEBA_MODE[mode],
+                         self.with_offsets)
 
     @check_jieba_add_word
     def add_word(self, word, freq=None):
@@ -226,8 +239,26 @@ class JiebaTokenizer(cde.JiebaTokenizerOp):
 class UnicodeCharTokenizer(cde.UnicodeCharTokenizerOp):
     """
     Tokenize a scalar tensor of UTF-8 string to Unicode characters.
+
+    Args:
+        with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+    Examples:
+        >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+        >>> tokenizer_op = text.UnicodeCharTokenizer()
+        >>> dataset = dataset.map(operations=tokenizer_op)
+        >>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
+        >>> #                                                   ["offsets_limit", dtype=uint32]}
+        >>> tokenizer_op = text.UnicodeCharTokenizer(True)
+        >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+        >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
     """
 
+    @check_with_offsets
+    def __init__(self, with_offsets=False):
+        self.with_offsets = with_offsets
+        super().__init__(self.with_offsets)
+
 
 class WordpieceTokenizer(cde.WordpieceTokenizerOp):
     """
@@ -239,22 +270,58 @@ class WordpieceTokenizer(cde.WordpieceTokenizerOp):
         max_bytes_per_token (int, optional): Tokens exceeding this length will not be further split(default=100).
         unknown_token (str, optional): When we can not found the token: if 'unknown_token' is empty string,
             return the token directly, else return 'unknown_token'(default='[UNK]').
+        with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+    Examples:
+        >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+        >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=['UNK'],
+        >>>                                       max_bytes_per_token=100, with_offsets=False)
+        >>> dataset = dataset.map(operations=tokenizer_op)
+        >>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
+        >>> #                                                   ["offsets_limit", dtype=uint32]}
+        >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=['UNK'],
+        >>>                                       max_bytes_per_token=100, with_offsets=True)
+        >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+        >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
     """
 
-    def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100, unknown_token='[UNK]'):
+    @check_wordpiece_tokenizer
+    def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100,
+                 unknown_token='[UNK]', with_offsets=False):
         self.vocab = vocab
         self.suffix_indicator = suffix_indicator
         self.max_bytes_per_token = max_bytes_per_token
         self.unknown_token = unknown_token
-        super().__init__(self.vocab, self.suffix_indicator, self.max_bytes_per_token, self.unknown_token)
+        self.with_offsets = with_offsets
+        super().__init__(self.vocab, self.suffix_indicator, self.max_bytes_per_token,
+                         self.unknown_token, self.with_offsets)
 
 
 if platform.system().lower() != 'windows':
     class WhitespaceTokenizer(cde.WhitespaceTokenizerOp):
         """
         Tokenize a scalar tensor of UTF-8 string on ICU defined whitespaces(such as: ' ', '\\\\t', '\\\\r', '\\\\n').
+
+        Args:
+            with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+        Examples:
+            >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+            >>> tokenizer_op = text.WhitespaceTokenizer()
+            >>> dataset = dataset.map(operations=tokenizer_op)
+            >>> # If with_offsets=False, then output three columns {["token", dtype=str],
+            >>> #                                                   ["offsets_start", dtype=uint32],
+            >>> #                                                   ["offsets_limit", dtype=uint32]}
+            >>> tokenizer_op = text.WhitespaceTokenizer(True)
+            >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+            >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
         """
 
+        @check_with_offsets
+        def __init__(self, with_offsets=False):
+            self.with_offsets = with_offsets
+            super().__init__(self.with_offsets)
+
 
     class UnicodeScriptTokenizer(cde.UnicodeScriptTokenizerOp):
         """
@@ -262,11 +329,25 @@ if platform.system().lower() != 'windows':
 
         Args:
             keep_whitespace (bool, optional): If or not emit whitespace tokens (default=False).
+            with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+        Examples:
+            >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+            >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=False)
+            >>> dataset = dataset.map(operations=tokenizer_op)
+            >>> # If with_offsets=False, then output three columns {["token", dtype=str],
+            >>> #                                                   ["offsets_start", dtype=uint32],
+            >>> #                                                   ["offsets_limit", dtype=uint32]}
+            >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True)
+            >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+            >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
         """
 
-        def __init__(self, keep_whitespace=False):
+        @check_unicode_script_tokenizer
+        def __init__(self, keep_whitespace=False, with_offsets=False):
             self.keep_whitespace = keep_whitespace
-            super().__init__(self.keep_whitespace)
+            self.with_offsets = with_offsets
+            super().__init__(self.keep_whitespace, self.with_offsets)
 
 
     class CaseFold(cde.CaseFoldOp):
@@ -302,6 +383,9 @@ if platform.system().lower() != 'windows':
         """
 
         def __init__(self, normalize_form=NormalizeForm.NFKC):
+            if not isinstance(normalize_form, NormalizeForm):
+                raise TypeError("Wrong input type for normalization_form, should be NormalizeForm.")
+
             self.normalize_form = DE_C_INTER_NORMALIZE_FORM[normalize_form]
             super().__init__(self.normalize_form)
 
@@ -338,12 +422,26 @@ if platform.system().lower() != 'windows':
             keep_delim_pattern(str, optional): The string matched by 'delim_pattern' can be kept as a token
                 if it can be matched by 'keep_delim_pattern'. And the default value is empty str(''),
                 in this situation, delimiters will not kept as a output token(default='').
+            with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+        Examples:
+            >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+            >>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=False)
+            >>> dataset = dataset.map(operations=tokenizer_op)
+            >>> # If with_offsets=False, then output three columns {["token", dtype=str],
+            >>> #                                                   ["offsets_start", dtype=uint32],
+            >>> #                                                   ["offsets_limit", dtype=uint32]}
+            >>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True)
+            >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+            >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
         """
 
-        def __init__(self, delim_pattern, keep_delim_pattern=''):
+        @check_regex_tokenizer
+        def __init__(self, delim_pattern, keep_delim_pattern='', with_offsets=False):
             self.delim_pattern = delim_pattern
             self.keep_delim_pattern = keep_delim_pattern
-            super().__init__(self.delim_pattern, self.keep_delim_pattern)
+            self.with_offsets = with_offsets
+            super().__init__(self.delim_pattern, self.keep_delim_pattern, self.with_offsets)
 
 
     class BasicTokenizer(cde.BasicTokenizerOp):
@@ -359,16 +457,41 @@ if platform.system().lower() != 'windows':
                 only effective when 'lower_case' is False. See NormalizeUTF8 for details(default='NONE').
             preserve_unused_token(bool, optional): If True, do not split special tokens like
                 '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'(default=True).
+            with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+        Examples:
+            >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+            >>> tokenizer_op = text.BasicTokenizer(lower_case=False,
+            >>>                                   keep_whitespace=False,
+            >>>                                   normalization_form=NormalizeForm.NONE,
+            >>>                                   preserve_unused_token=True,
+            >>>                                   with_offsets=False)
+            >>> dataset = dataset.map(operations=tokenizer_op)
+            >>> # If with_offsets=False, then output three columns {["token", dtype=str],
+            >>> #                                                   ["offsets_start", dtype=uint32],
+            >>> #                                                   ["offsets_limit", dtype=uint32]}
+            >>> tokenizer_op = text.BasicTokenizer(lower_case=False,
+            >>>                                   keep_whitespace=False,
+            >>>                                   normalization_form=NormalizeForm.NONE,
+            >>>                                   preserve_unused_token=True,
+            >>>                                   with_offsets=True)
+            >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+            >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
         """
 
-        def __init__(self, lower_case=False, keep_whitespace=False,
-                     normalization_form=NormalizeForm.NONE, preserve_unused_token=True):
+        @check_basic_tokenizer
+        def __init__(self, lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE,
+                     preserve_unused_token=True, with_offsets=False):
+            if not isinstance(normalization_form, NormalizeForm):
+                raise TypeError("Wrong input type for normalization_form, should be NormalizeForm.")
+
             self.lower_case = lower_case
             self.keep_whitespace = keep_whitespace
             self.normalization_form = DE_C_INTER_NORMALIZE_FORM[normalization_form]
             self.preserve_unused_token = preserve_unused_token
-            super().__init__(self.lower_case, self.keep_whitespace,
-                             self.normalization_form, self.preserve_unused_token)
+            self.with_offsets = with_offsets
+            super().__init__(self.lower_case, self.keep_whitespace, self.normalization_form,
+                             self.preserve_unused_token, self.with_offsets)
 
 
     class BertTokenizer(cde.BertTokenizerOp):
@@ -389,11 +512,33 @@ if platform.system().lower() != 'windows':
                 only effective when 'lower_case' is False. See NormalizeUTF8 for details(default='NONE').
             preserve_unused_token(bool, optional): If True, do not split special tokens like
                 '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'(default=True).
+            with_offsets (bool, optional): If or not output offsets of tokens (default=False).
+
+        Examples:
+            >>> # If with_offsets=False, default output one column {["text", dtype=str]}
+            >>> tokenizer_op = text.BertTokenizer(vocab=vocab, suffix_indicator='##', max_bytes_per_token=100,
+            >>>                                  unknown_token=100, lower_case=False, keep_whitespace=False,
+            >>>                                  normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
+            >>>                                  with_offsets=False)
+            >>> dataset = dataset.map(operations=tokenizer_op)
+            >>> # If with_offsets=False, then output three columns {["token", dtype=str],
+            >>> #                                                   ["offsets_start", dtype=uint32],
+            >>> #                                                   ["offsets_limit", dtype=uint32]}
+            >>> tokenizer_op = text.BertTokenizer(vocab=vocab, suffix_indicator='##', max_bytes_per_token=100,
+            >>>                                  unknown_token=100, lower_case=False, keep_whitespace=False,
+            >>>                                  normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
+            >>>                                  with_offsets=True)
+            >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+            >>>                 columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
         """
 
-        def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100,
-                     unknown_token='[UNK]', lower_case=False, keep_whitespace=False,
-                     normalization_form=NormalizeForm.NONE, preserve_unused_token=True):
+        @check_bert_tokenizer
+        def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100, unknown_token='[UNK]',
+                     lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE,
+                     preserve_unused_token=True, with_offsets=False):
+            if not isinstance(normalization_form, NormalizeForm):
+                raise TypeError("Wrong input type for normalization_form, should be NormalizeForm.")
+
             self.vocab = vocab
             self.suffix_indicator = suffix_indicator
             self.max_bytes_per_token = max_bytes_per_token
@@ -402,8 +547,10 @@ if platform.system().lower() != 'windows':
             self.keep_whitespace = keep_whitespace
             self.normalization_form = DE_C_INTER_NORMALIZE_FORM[normalization_form]
             self.preserve_unused_token = preserve_unused_token
+            self.with_offsets = with_offsets
             super().__init__(self.vocab, self.suffix_indicator, self.max_bytes_per_token, self.unknown_token,
-                             self.lower_case, self.keep_whitespace, self.normalization_form, self.preserve_unused_token)
+                             self.lower_case, self.keep_whitespace, self.normalization_form,
+                             self.preserve_unused_token, self.with_offsets)
 
 
 class TruncateSequencePair(cde.TruncateSequencePairOp):
diff --git a/mindspore/dataset/text/utils.py b/mindspore/dataset/text/utils.py
index 7347a4b8543..ef1d0e6fc5f 100644
--- a/mindspore/dataset/text/utils.py
+++ b/mindspore/dataset/text/utils.py
@@ -28,6 +28,7 @@ __all__ = [
     "Vocab", "to_str", "to_bytes"
 ]
 
+
 class Vocab(cde.Vocab):
     """
     Vocab object that is used to lookup a word.
@@ -38,7 +39,7 @@ class Vocab(cde.Vocab):
     @classmethod
     @check_from_dataset
     def from_dataset(cls, dataset, columns=None, freq_range=None, top_k=None, special_tokens=None,
-                     special_first=None):
+                     special_first=True):
         """
         Build a vocab from a dataset.
 
@@ -62,13 +63,21 @@ class Vocab(cde.Vocab):
             special_tokens(list, optional):  a list of strings, each one is a special token. for example
                 special_tokens=["<pad>","<unk>"] (default=None, no special tokens will be added).
             special_first(bool, optional): whether special_tokens will be prepended/appended to vocab. If special_tokens
-                is specified and special_first is set to None, special_tokens will be prepended (default=None).
+                is specified and special_first is set to True, special_tokens will be prepended (default=True).
 
         Returns:
             Vocab, Vocab object built from dataset.
         """
 
         vocab = Vocab()
+        if columns is None:
+            columns = []
+        if not isinstance(columns, list):
+            columns = [columns]
+        if freq_range is None:
+            freq_range = (None, None)
+        if special_tokens is None:
+            special_tokens = []
         root = copy.deepcopy(dataset).build_vocab(vocab, columns, freq_range, top_k, special_tokens, special_first)
         for d in root.create_dict_iterator():
             if d is not None:
@@ -77,7 +86,7 @@ class Vocab(cde.Vocab):
 
     @classmethod
     @check_from_list
-    def from_list(cls, word_list, special_tokens=None, special_first=None):
+    def from_list(cls, word_list, special_tokens=None, special_first=True):
         """
         Build a vocab object from a list of word.
 
@@ -86,29 +95,33 @@ class Vocab(cde.Vocab):
             special_tokens(list, optional):  a list of strings, each one is a special token. for example
                 special_tokens=["<pad>","<unk>"] (default=None, no special tokens will be added).
             special_first(bool, optional): whether special_tokens will be prepended/appended to vocab, If special_tokens
-                is specified and special_first is set to None, special_tokens will be prepended (default=None).
+                is specified and special_first is set to True, special_tokens will be prepended (default=True).
         """
-
+        if special_tokens is None:
+            special_tokens = []
         return super().from_list(word_list, special_tokens, special_first)
 
     @classmethod
     @check_from_file
-    def from_file(cls, file_path, delimiter=None, vocab_size=None, special_tokens=None, special_first=None):
+    def from_file(cls, file_path, delimiter="", vocab_size=None, special_tokens=None, special_first=True):
         """
         Build a vocab object from a list of word.
 
         Args:
             file_path (str): path to the file which contains the vocab list.
             delimiter (str, optional): a delimiter to break up each line in file, the first element is taken to be
-                the word (default=None).
+                the word (default="").
             vocab_size (int, optional): number of words to read from file_path (default=None, all words are taken).
             special_tokens (list, optional):  a list of strings, each one is a special token. for example
                 special_tokens=["<pad>","<unk>"] (default=None, no special tokens will be added).
             special_first (bool, optional): whether special_tokens will be prepended/appended to vocab,
-                If special_tokens is specified and special_first is set to None,
-                special_tokens will be prepended (default=None).
+                If special_tokens is specified and special_first is set to True,
+                special_tokens will be prepended (default=True).
         """
-
+        if vocab_size is None:
+            vocab_size = -1
+        if special_tokens is None:
+            special_tokens = []
         return super().from_file(file_path, delimiter, vocab_size, special_tokens, special_first)
 
     @classmethod
diff --git a/mindspore/dataset/text/validators.py b/mindspore/dataset/text/validators.py
index afab8665cde..b0327f5609c 100644
--- a/mindspore/dataset/text/validators.py
+++ b/mindspore/dataset/text/validators.py
@@ -17,23 +17,22 @@ validators for text ops
 """
 
 from functools import wraps
-
-import mindspore._c_dataengine as cde
 import mindspore.common.dtype as mstype
 
+import mindspore._c_dataengine as cde
 from mindspore._c_expression import typing
-from ..transforms.validators import check_uint32, check_pos_int64
+
+from ..core.validator_helpers import parse_user_args, type_check, type_check_list, check_uint32, \
+    INT32_MAX, check_value, check_positive
 
 
 def check_unique_list_of_words(words, arg_name):
     """Check that words is a list and each element is a str without any duplication"""
 
-    if not isinstance(words, list):
-        raise ValueError(arg_name + " needs to be a list of words of type string.")
+    type_check(words, (list,), arg_name)
     words_set = set()
     for word in words:
-        if not isinstance(word, str):
-            raise ValueError("each word in " + arg_name + " needs to be type str.")
+        type_check(word, (str,), arg_name)
         if word in words_set:
             raise ValueError(arg_name + " contains duplicate word: " + word + ".")
         words_set.add(word)
@@ -41,161 +40,100 @@ def check_unique_list_of_words(words, arg_name):
 
 
 def check_lookup(method):
-    """A wrapper that wrap a parameter checker to the original function."""
+    """A wrapper that wraps a parameter checker to the original function."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        vocab, unknown = (list(args) + 2 * [None])[:2]
-        if "vocab" in kwargs:
-            vocab = kwargs.get("vocab")
-        if "unknown" in kwargs:
-            unknown = kwargs.get("unknown")
-        if unknown is not None:
-            if not (isinstance(unknown, int) and unknown >= 0):
-                raise ValueError("unknown needs to be a non-negative integer.")
+        [vocab, unknown_token], _ = parse_user_args(method, *args, **kwargs)
 
-        if not isinstance(vocab, cde.Vocab):
-            raise ValueError("vocab is not an instance of cde.Vocab.")
+        if unknown_token is not None:
+            type_check(unknown_token, (str,), "unknown_token")
 
-        kwargs["vocab"] = vocab
-        kwargs["unknown"] = unknown
-        return method(self, **kwargs)
+        type_check(vocab, (cde.Vocab,), "vocab is not an instance of cde.Vocab.")
+
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_from_file(method):
-    """A wrapper that wrap a parameter checker to the original function."""
+    """A wrapper that wraps a parameter checker to the original function."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        file_path, delimiter, vocab_size, special_tokens, special_first = (list(args) + 5 * [None])[:5]
-        if "file_path" in kwargs:
-            file_path = kwargs.get("file_path")
-        if "delimiter" in kwargs:
-            delimiter = kwargs.get("delimiter")
-        if "vocab_size" in kwargs:
-            vocab_size = kwargs.get("vocab_size")
-        if "special_tokens" in kwargs:
-            special_tokens = kwargs.get("special_tokens")
-        if "special_first" in kwargs:
-            special_first = kwargs.get("special_first")
-
-        if not isinstance(file_path, str):
-            raise ValueError("file_path needs to be str.")
-
-        if delimiter is not None:
-            if not isinstance(delimiter, str):
-                raise ValueError("delimiter needs to be str.")
-        else:
-            delimiter = ""
+        [file_path, delimiter, vocab_size, special_tokens, special_first], _ = parse_user_args(method, *args,
+                                                                                               **kwargs)
+        if special_tokens is not None:
+            check_unique_list_of_words(special_tokens, "special_tokens")
+        type_check_list([file_path, delimiter], (str,), ["file_path", "delimiter"])
         if vocab_size is not None:
-            if not (isinstance(vocab_size, int) and vocab_size > 0):
-                raise ValueError("vocab size needs to be a positive integer.")
-        else:
-            vocab_size = -1
+            check_value(vocab_size, (-1, INT32_MAX), "vocab_size")
+        type_check(special_first, (bool,), special_first)
 
-        if special_first is None:
-            special_first = True
-
-        if not isinstance(special_first, bool):
-            raise ValueError("special_first needs to be a boolean value")
-
-        if special_tokens is None:
-            special_tokens = []
-
-        check_unique_list_of_words(special_tokens, "special_tokens")
-
-        kwargs["file_path"] = file_path
-        kwargs["delimiter"] = delimiter
-        kwargs["vocab_size"] = vocab_size
-        kwargs["special_tokens"] = special_tokens
-        kwargs["special_first"] = special_first
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_from_list(method):
-    """A wrapper that wrap a parameter checker to the original function."""
+    """A wrapper that wraps a parameter checker to the original function."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        word_list, special_tokens, special_first = (list(args) + 3 * [None])[:3]
-        if "word_list" in kwargs:
-            word_list = kwargs.get("word_list")
-        if "special_tokens" in kwargs:
-            special_tokens = kwargs.get("special_tokens")
-        if "special_first" in kwargs:
-            special_first = kwargs.get("special_first")
-        if special_tokens is None:
-            special_tokens = []
+        [word_list, special_tokens, special_first], _ = parse_user_args(method, *args, **kwargs)
+
         word_set = check_unique_list_of_words(word_list, "word_list")
-        token_set = check_unique_list_of_words(special_tokens, "special_tokens")
+        if special_tokens is not None:
+            token_set = check_unique_list_of_words(special_tokens, "special_tokens")
 
-        intersect = word_set.intersection(token_set)
+            intersect = word_set.intersection(token_set)
 
-        if intersect != set():
-            raise ValueError("special_tokens and word_list contain duplicate word :" + str(intersect) + ".")
+            if intersect != set():
+                raise ValueError("special_tokens and word_list contain duplicate word :" + str(intersect) + ".")
 
-        if special_first is None:
-            special_first = True
+        type_check(special_first, (bool,), "special_first")
 
-        if not isinstance(special_first, bool):
-            raise ValueError("special_first needs to be a boolean value.")
-
-        kwargs["word_list"] = word_list
-        kwargs["special_tokens"] = special_tokens
-        kwargs["special_first"] = special_first
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_from_dict(method):
-    """A wrapper that wrap a parameter checker to the original function."""
+    """A wrapper that wraps a parameter checker to the original function."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        word_dict, = (list(args) + [None])[:1]
-        if "word_dict" in kwargs:
-            word_dict = kwargs.get("word_dict")
-        if not isinstance(word_dict, dict):
-            raise ValueError("word_dict needs to be a list of word,id pairs.")
+        [word_dict], _ = parse_user_args(method, *args, **kwargs)
+
+        type_check(word_dict, (dict,), "word_dict")
+
         for word, word_id in word_dict.items():
-            if not isinstance(word, str):
-                raise ValueError("Each word in word_dict needs to be type string.")
-            if not (isinstance(word_id, int) and word_id >= 0):
-                raise ValueError("Each word id needs to be positive integer.")
-        kwargs["word_dict"] = word_dict
-        return method(self, **kwargs)
+            type_check(word, (str,), "word")
+            type_check(word_id, (int,), "word_id")
+            check_value(word_id, (0, INT32_MAX), "word_id")
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_jieba_init(method):
-    """Wrapper method to check the parameters of jieba add word."""
+    """Wrapper method to check the parameters of jieba init."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        hmm_path, mp_path, model = (list(args) + 3 * [None])[:3]
+        [hmm_path, mp_path, _, with_offsets], _ = parse_user_args(method, *args, **kwargs)
 
-        if "hmm_path" in kwargs:
-            hmm_path = kwargs.get("hmm_path")
-        if "mp_path" in kwargs:
-            mp_path = kwargs.get("mp_path")
         if hmm_path is None:
-            raise ValueError(
-                "The dict of HMMSegment in cppjieba is not provided.")
-        kwargs["hmm_path"] = hmm_path
+            raise ValueError("The dict of HMMSegment in cppjieba is not provided.")
+        if not isinstance(hmm_path, str):
+            raise TypeError("Wrong input type for hmm_path, should be string.")
         if mp_path is None:
-            raise ValueError(
-                "The dict of MPSegment in cppjieba is not provided.")
-        kwargs["mp_path"] = mp_path
-        if model is not None:
-            kwargs["model"] = model
-        return method(self, **kwargs)
+            raise ValueError("The dict of MPSegment in cppjieba is not provided.")
+        if not isinstance(mp_path, str):
+            raise TypeError("Wrong input type for mp_path, should be string.")
+        if not isinstance(with_offsets, bool):
+            raise TypeError("Wrong input type for with_offsets, should be boolean.")
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -205,19 +143,12 @@ def check_jieba_add_word(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        word, freq = (list(args) + 2 * [None])[:2]
-
-        if "word" in kwargs:
-            word = kwargs.get("word")
-        if "freq" in kwargs:
-            freq = kwargs.get("freq")
+        [word, freq], _ = parse_user_args(method, *args, **kwargs)
         if word is None:
             raise ValueError("word is not provided.")
-        kwargs["word"] = word
         if freq is not None:
             check_uint32(freq)
-            kwargs["freq"] = freq
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -227,104 +158,183 @@ def check_jieba_add_dict(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        user_dict = (list(args) + [None])[0]
-        if "user_dict" in kwargs:
-            user_dict = kwargs.get("user_dict")
-        if user_dict is None:
-            raise ValueError("user_dict is not provided.")
-        kwargs["user_dict"] = user_dict
-        return method(self, **kwargs)
+        parse_user_args(method, *args, **kwargs)
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
+def check_with_offsets(method):
+    """Wrapper method to check if with_offsets is the only one parameter."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [with_offsets], _ = parse_user_args(method, *args, **kwargs)
+        if not isinstance(with_offsets, bool):
+            raise TypeError("Wrong input type for with_offsets, should be boolean.")
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
+def check_unicode_script_tokenizer(method):
+    """Wrapper method to check the parameter of UnicodeScriptTokenizer."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [keep_whitespace, with_offsets], _ = parse_user_args(method, *args, **kwargs)
+        if not isinstance(keep_whitespace, bool):
+            raise TypeError("Wrong input type for keep_whitespace, should be boolean.")
+        if not isinstance(with_offsets, bool):
+            raise TypeError("Wrong input type for with_offsets, should be boolean.")
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
+def check_wordpiece_tokenizer(method):
+    """Wrapper method to check the parameter of WordpieceTokenizer."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [vocab, suffix_indicator, max_bytes_per_token, unknown_token, with_offsets], _ = \
+            parse_user_args(method, *args, **kwargs)
+        if vocab is None:
+            raise ValueError("vocab is not provided.")
+        if not isinstance(vocab, cde.Vocab):
+            raise TypeError("Wrong input type for vocab, should be Vocab object.")
+        if not isinstance(suffix_indicator, str):
+            raise TypeError("Wrong input type for suffix_indicator, should be string.")
+        if not isinstance(unknown_token, str):
+            raise TypeError("Wrong input type for unknown_token, should be string.")
+        if not isinstance(with_offsets, bool):
+            raise TypeError("Wrong input type for with_offsets, should be boolean.")
+        check_uint32(max_bytes_per_token)
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
+def check_regex_tokenizer(method):
+    """Wrapper method to check the parameter of RegexTokenizer."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [delim_pattern, keep_delim_pattern, with_offsets], _ = parse_user_args(method, *args, **kwargs)
+        if delim_pattern is None:
+            raise ValueError("delim_pattern is not provided.")
+        if not isinstance(delim_pattern, str):
+            raise TypeError("Wrong input type for delim_pattern, should be string.")
+        if not isinstance(keep_delim_pattern, str):
+            raise TypeError("Wrong input type for keep_delim_pattern, should be string.")
+        if not isinstance(with_offsets, bool):
+            raise TypeError("Wrong input type for with_offsets, should be boolean.")
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
+def check_basic_tokenizer(method):
+    """Wrapper method to check the parameter of RegexTokenizer."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [lower_case, keep_whitespace, _, preserve_unused, with_offsets], _ = \
+            parse_user_args(method, *args, **kwargs)
+        if not isinstance(lower_case, bool):
+            raise TypeError("Wrong input type for lower_case, should be boolean.")
+        if not isinstance(keep_whitespace, bool):
+            raise TypeError("Wrong input type for keep_whitespace, should be boolean.")
+        if not isinstance(preserve_unused, bool):
+            raise TypeError("Wrong input type for preserve_unused_token, should be boolean.")
+        if not isinstance(with_offsets, bool):
+            raise TypeError("Wrong input type for with_offsets, should be boolean.")
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
+def check_bert_tokenizer(method):
+    """Wrapper method to check the parameter of BertTokenizer."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [vocab, suffix_indicator, max_bytes_per_token, unknown_token, lower_case, keep_whitespace, _,
+         preserve_unused_token, with_offsets], _ = parse_user_args(method, *args, **kwargs)
+        if vocab is None:
+            raise ValueError("vacab is not provided.")
+        if not isinstance(vocab, cde.Vocab):
+            raise TypeError("Wrong input type for vocab, should be Vocab object.")
+        if not isinstance(suffix_indicator, str):
+            raise TypeError("Wrong input type for suffix_indicator, should be string.")
+        if not isinstance(max_bytes_per_token, int):
+            raise TypeError("Wrong input type for max_bytes_per_token, should be int.")
+        check_uint32(max_bytes_per_token)
+
+        if not isinstance(unknown_token, str):
+            raise TypeError("Wrong input type for unknown_token, should be string.")
+        if not isinstance(lower_case, bool):
+            raise TypeError("Wrong input type for lower_case, should be boolean.")
+        if not isinstance(keep_whitespace, bool):
+            raise TypeError("Wrong input type for keep_whitespace, should be boolean.")
+        if not isinstance(preserve_unused_token, bool):
+            raise TypeError("Wrong input type for preserve_unused_token, should be boolean.")
+        if not isinstance(with_offsets, bool):
+            raise TypeError("Wrong input type for with_offsets, should be boolean.")
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_from_dataset(method):
-    """A wrapper that wrap a parameter checker to the original function."""
+    """A wrapper that wraps a parameter checker to the original function."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
 
-        dataset, columns, freq_range, top_k, special_tokens, special_first = (list(args) + 6 * [None])[:6]
-        if "dataset" in kwargs:
-            dataset = kwargs.get("dataset")
-        if "columns" in kwargs:
-            columns = kwargs.get("columns")
-        if "freq_range" in kwargs:
-            freq_range = kwargs.get("freq_range")
-        if "top_k" in kwargs:
-            top_k = kwargs.get("top_k")
-        if "special_tokens" in kwargs:
-            special_tokens = kwargs.get("special_tokens")
-        if "special_first" in kwargs:
-            special_first = kwargs.get("special_first")
+        [_, columns, freq_range, top_k, special_tokens, special_first], _ = parse_user_args(method, *args,
+                                                                                            **kwargs)
+        if columns is not None:
+            if not isinstance(columns, list):
+                columns = [columns]
+                col_names = ["col_{0}".format(i) for i in range(len(columns))]
+                type_check_list(columns, (str,), col_names)
 
-        if columns is None:
-            columns = []
+        if freq_range is not None:
+            type_check(freq_range, (tuple,), "freq_range")
 
-        if not isinstance(columns, list):
-            columns = [columns]
+            if len(freq_range) != 2:
+                raise ValueError("freq_range needs to be a tuple of 2 integers or an int and a None.")
 
-        for column in columns:
-            if not isinstance(column, str):
-                raise ValueError("columns need to be a list of strings.")
+            for num in freq_range:
+                if num is not None and (not isinstance(num, int)):
+                    raise ValueError(
+                        "freq_range needs to be either None or a tuple of 2 integers or an int and a None.")
 
-        if freq_range is None:
-            freq_range = (None, None)
+            if isinstance(freq_range[0], int) and isinstance(freq_range[1], int):
+                if freq_range[0] > freq_range[1] or freq_range[0] < 0:
+                    raise ValueError("frequency range [a,b] should be 0 <= a <= b (a,b are inclusive).")
 
-        if not isinstance(freq_range, tuple) or len(freq_range) != 2:
-            raise ValueError("freq_range needs to be either None or a tuple of 2 integers or an int and a None.")
+        type_check(top_k, (int, type(None)), "top_k")
 
-        for num in freq_range:
-            if num is not None and (not isinstance(num, int)):
-                raise ValueError("freq_range needs to be either None or a tuple of 2 integers or an int and a None.")
+        if isinstance(top_k, int):
+            check_positive(top_k, "top_k")
+        type_check(special_first, (bool,), "special_first")
 
-        if isinstance(freq_range[0], int) and isinstance(freq_range[1], int):
-            if freq_range[0] > freq_range[1] or freq_range[0] < 0:
-                raise ValueError("frequency range [a,b] should be 0 <= a <= b (a,b are inclusive).")
+        if special_tokens is not None:
+            check_unique_list_of_words(special_tokens, "special_tokens")
 
-        if top_k is not None and (not isinstance(top_k, int)):
-            raise ValueError("top_k needs to be a positive integer.")
-
-        if isinstance(top_k, int) and top_k <= 0:
-            raise ValueError("top_k needs to be a positive integer.")
-
-        if special_first is None:
-            special_first = True
-
-        if special_tokens is None:
-            special_tokens = []
-
-        if not isinstance(special_first, bool):
-            raise ValueError("special_first needs to be a boolean value.")
-
-        check_unique_list_of_words(special_tokens, "special_tokens")
-
-        kwargs["dataset"] = dataset
-        kwargs["columns"] = columns
-        kwargs["freq_range"] = freq_range
-        kwargs["top_k"] = top_k
-        kwargs["special_tokens"] = special_tokens
-        kwargs["special_first"] = special_first
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_ngram(method):
-    """A wrapper that wrap a parameter checker to the original function."""
+    """A wrapper that wraps a parameter checker to the original function."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        n, left_pad, right_pad, separator = (list(args) + 4 * [None])[:4]
-        if "n" in kwargs:
-            n = kwargs.get("n")
-        if "left_pad" in kwargs:
-            left_pad = kwargs.get("left_pad")
-        if "right_pad" in kwargs:
-            right_pad = kwargs.get("right_pad")
-        if "separator" in kwargs:
-            separator = kwargs.get("separator")
+        [n, left_pad, right_pad, separator], _ = parse_user_args(method, *args, **kwargs)
 
         if isinstance(n, int):
             n = [n]
@@ -332,15 +342,9 @@ def check_ngram(method):
         if not (isinstance(n, list) and n != []):
             raise ValueError("n needs to be a non-empty list of positive integers.")
 
-        for gram in n:
-            if not (isinstance(gram, int) and gram > 0):
-                raise ValueError("n in ngram needs to be a positive number.")
-
-        if left_pad is None:
-            left_pad = ("", 0)
-
-        if right_pad is None:
-            right_pad = ("", 0)
+        for i, gram in enumerate(n):
+            type_check(gram, (int,), "gram[{0}]".format(i))
+            check_positive(gram, "gram_{}".format(i))
 
         if not (isinstance(left_pad, tuple) and len(left_pad) == 2 and isinstance(left_pad[0], str) and isinstance(
                 left_pad[1], int)):
@@ -353,11 +357,7 @@ def check_ngram(method):
         if not (left_pad[1] >= 0 and right_pad[1] >= 0):
             raise ValueError("padding width need to be positive numbers.")
 
-        if separator is None:
-            separator = " "
-
-        if not isinstance(separator, str):
-            raise ValueError("separator needs to be a string.")
+        type_check(separator, (str,), "separator")
 
         kwargs["n"] = n
         kwargs["left_pad"] = left_pad
@@ -374,16 +374,8 @@ def check_pair_truncate(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        max_length = (list(args) + [None])[0]
-        if "max_length" in kwargs:
-            max_length = kwargs.get("max_length")
-        if max_length is None:
-            raise ValueError("max_length is not provided.")
-
-        check_pos_int64(max_length)
-        kwargs["max_length"] = max_length
-
-        return method(self, **kwargs)
+        parse_user_args(method, *args, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -393,22 +385,13 @@ def check_to_number(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        data_type = (list(args) + [None])[0]
-        if "data_type" in kwargs:
-            data_type = kwargs.get("data_type")
-
-        if data_type is None:
-            raise ValueError("data_type is a mandatory parameter but was not provided.")
-
-        if not isinstance(data_type, typing.Type):
-            raise TypeError("data_type is not a MindSpore data type.")
+        [data_type], _ = parse_user_args(method, *args, **kwargs)
+        type_check(data_type, (typing.Type,), "data_type")
 
         if data_type not in mstype.number_type:
             raise TypeError("data_type is not numeric data type.")
 
-        kwargs["data_type"] = data_type
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -418,18 +401,11 @@ def check_python_tokenizer(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        tokenizer = (list(args) + [None])[0]
-        if "tokenizer" in kwargs:
-            tokenizer = kwargs.get("tokenizer")
-
-        if tokenizer is None:
-            raise ValueError("tokenizer is a mandatory parameter.")
+        [tokenizer], _ = parse_user_args(method, *args, **kwargs)
 
         if not callable(tokenizer):
             raise TypeError("tokenizer is not a callable python function")
 
-        kwargs["tokenizer"] = tokenizer
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
diff --git a/mindspore/dataset/transforms/c_transforms.py b/mindspore/dataset/transforms/c_transforms.py
index 48e986202c3..62496822e51 100644
--- a/mindspore/dataset/transforms/c_transforms.py
+++ b/mindspore/dataset/transforms/c_transforms.py
@@ -197,7 +197,7 @@ class PadEnd(cde.PadEndOp):
 
 class Concatenate(cde.ConcatenateOp):
     """
-    Tensor operation to prepend and append to a tensor.
+    Tensor operation that concatenates all columns into a single tensor.
 
     Args:
         axis (int, optional): axis to concatenate the tensors along (Default=0).
diff --git a/mindspore/dataset/transforms/validators.py b/mindspore/dataset/transforms/validators.py
index 6b5760e0c5a..9fe0fa5f106 100644
--- a/mindspore/dataset/transforms/validators.py
+++ b/mindspore/dataset/transforms/validators.py
@@ -18,6 +18,7 @@ from functools import wraps
 import numpy as np
 
 from mindspore._c_expression import typing
+from ..core.validator_helpers import parse_user_args, type_check, check_pos_int64, check_value, check_positive
 
 # POS_INT_MIN is used to limit values from starting from 0
 POS_INT_MIN = 1
@@ -37,106 +38,33 @@ DOUBLE_MAX_INTEGER = 9007199254740992
 DOUBLE_MIN_INTEGER = -9007199254740992
 
 
-def check_type(value, valid_type):
-    if not isinstance(value, valid_type):
-        raise ValueError("Wrong input type")
-
-
-def check_value(value, valid_range):
-    if value < valid_range[0] or value > valid_range[1]:
-        raise ValueError("Input is not within the required range")
-
-
-def check_range(values, valid_range):
-    if not valid_range[0] <= values[0] <= values[1] <= valid_range[1]:
-        raise ValueError("Input range is not valid")
-
-
-def check_positive(value):
-    if value <= 0:
-        raise ValueError("Input must greater than 0")
-
-
-def check_positive_float(value, valid_max=None):
-    if value <= 0 or not isinstance(value, float) or (valid_max is not None and value > valid_max):
-        raise ValueError("Input need to be a valid positive float.")
-
-
-def check_bool(value):
-    if not isinstance(value, bool):
-        raise ValueError("Value needs to be a boolean.")
-
-
-def check_2tuple(value):
-    if not (isinstance(value, tuple) and len(value) == 2):
-        raise ValueError("Value needs to be a 2-tuple.")
-
-
-def check_list(value):
-    if not isinstance(value, list):
-        raise ValueError("The input needs to be a list.")
-
-
-def check_uint8(value):
-    if not isinstance(value, int):
-        raise ValueError("The input needs to be a integer")
-    check_value(value, [UINT8_MIN, UINT8_MAX])
-
-
-def check_uint32(value):
-    if not isinstance(value, int):
-        raise ValueError("The input needs to be a integer")
-    check_value(value, [UINT32_MIN, UINT32_MAX])
-
-
-def check_pos_int32(value):
-    """Checks for int values starting from 1"""
-    if not isinstance(value, int):
-        raise ValueError("The input needs to be a integer")
-    check_value(value, [POS_INT_MIN, INT32_MAX])
-
-
-def check_uint64(value):
-    if not isinstance(value, int):
-        raise ValueError("The input needs to be a integer")
-    check_value(value, [UINT64_MIN, UINT64_MAX])
-
-
-def check_pos_int64(value):
-    if not isinstance(value, int):
-        raise ValueError("The input needs to be a integer")
-    check_value(value, [UINT64_MIN, INT64_MAX])
-
-
-def check_pos_float32(value):
-    check_value(value, [UINT32_MIN, FLOAT_MAX_INTEGER])
-
-
-def check_pos_float64(value):
-    check_value(value, [UINT64_MIN, DOUBLE_MAX_INTEGER])
-
-
-def check_one_hot_op(method):
-    """Wrapper method to check the parameters of one hot op."""
+def check_fill_value(method):
+    """Wrapper method to check the parameters of fill_value."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 2 * [None])[:2]
-        num_classes, smoothing_rate = args
-        if "num_classes" in kwargs:
-            num_classes = kwargs.get("num_classes")
-        if "smoothing_rate" in kwargs:
-            smoothing_rate = kwargs.get("smoothing_rate")
+        [fill_value], _ = parse_user_args(method, *args, **kwargs)
+        type_check(fill_value, (str, float, bool, int, bytes), "fill_value")
+
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
+def check_one_hot_op(method):
+    """Wrapper method to check the parameters of one_hot_op."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [num_classes, smoothing_rate], _ = parse_user_args(method, *args, **kwargs)
+
+        type_check(num_classes, (int,), "num_classes")
+        check_positive(num_classes)
 
-        if num_classes is None:
-            raise ValueError("num_classes")
-        check_pos_int32(num_classes)
-        kwargs["num_classes"] = num_classes
         if smoothing_rate is not None:
-            check_value(smoothing_rate, [0., 1.])
-            kwargs["smoothing_rate"] = smoothing_rate
+            check_value(smoothing_rate, [0., 1.], "smoothing_rate")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -146,35 +74,12 @@ def check_num_classes(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        num_classes = (list(args) + [None])[0]
-        if "num_classes" in kwargs:
-            num_classes = kwargs.get("num_classes")
-        if num_classes is None:
-            raise ValueError("num_classes is not provided.")
+        [num_classes], _ = parse_user_args(method, *args, **kwargs)
 
-        check_pos_int32(num_classes)
-        kwargs["num_classes"] = num_classes
+        type_check(num_classes, (int,), "num_classes")
+        check_positive(num_classes)
 
-        return method(self, **kwargs)
-
-    return new_method
-
-
-def check_fill_value(method):
-    """Wrapper method to check the parameters of fill value."""
-
-    @wraps(method)
-    def new_method(self, *args, **kwargs):
-        fill_value = (list(args) + [None])[0]
-        if "fill_value" in kwargs:
-            fill_value = kwargs.get("fill_value")
-        if fill_value is None:
-            raise ValueError("fill_value is not provided.")
-        if not isinstance(fill_value, (str, float, bool, int, bytes)):
-            raise TypeError("fill_value must be either a primitive python str, float, bool, bytes or int")
-        kwargs["fill_value"] = fill_value
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -184,17 +89,11 @@ def check_de_type(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        data_type = (list(args) + [None])[0]
-        if "data_type" in kwargs:
-            data_type = kwargs.get("data_type")
+        [data_type], _ = parse_user_args(method, *args, **kwargs)
 
-        if data_type is None:
-            raise ValueError("data_type is not provided.")
-        if not isinstance(data_type, typing.Type):
-            raise TypeError("data_type is not a MindSpore data type.")
-        kwargs["data_type"] = data_type
+        type_check(data_type, (typing.Type,), "data_type")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -204,13 +103,11 @@ def check_slice_op(method):
 
     @wraps(method)
     def new_method(self, *args):
-        for i, arg in enumerate(args):
-            if arg is not None and arg is not Ellipsis and not isinstance(arg, (int, slice, list)):
-                raise TypeError("Indexing of dim " + str(i) + "is not of valid type")
+        for _, arg in enumerate(args):
+            type_check(arg, (int, slice, list, type(None), type(Ellipsis)), "arg")
             if isinstance(arg, list):
                 for a in arg:
-                    if not isinstance(a, int):
-                        raise TypeError("Index " + a + " is not an int")
+                    type_check(a, (int,), "a")
         return method(self, *args)
 
     return new_method
@@ -221,36 +118,14 @@ def check_mask_op(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        operator, constant, dtype = (list(args) + 3 * [None])[:3]
-        if "operator" in kwargs:
-            operator = kwargs.get("operator")
-        if "constant" in kwargs:
-            constant = kwargs.get("constant")
-        if "dtype" in kwargs:
-            dtype = kwargs.get("dtype")
-
-        if operator is None:
-            raise ValueError("operator is not provided.")
-
-        if constant is None:
-            raise ValueError("constant is not provided.")
+        [operator, constant, dtype], _ = parse_user_args(method, *args, **kwargs)
 
         from .c_transforms import Relational
-        if not isinstance(operator, Relational):
-            raise TypeError("operator is not a Relational operator enum.")
+        type_check(operator, (Relational,), "operator")
+        type_check(constant, (str, float, bool, int, bytes), "constant")
+        type_check(dtype, (typing.Type,), "dtype")
 
-        if not isinstance(constant, (str, float, bool, int, bytes)):
-            raise TypeError("constant must be either a primitive python str, float, bool, bytes or int")
-
-        if dtype is not None:
-            if not isinstance(dtype, typing.Type):
-                raise TypeError("dtype is not a MindSpore data type.")
-            kwargs["dtype"] = dtype
-
-        kwargs["operator"] = operator
-        kwargs["constant"] = constant
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -260,22 +135,12 @@ def check_pad_end(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        pad_shape, pad_value = (list(args) + 2 * [None])[:2]
-        if "pad_shape" in kwargs:
-            pad_shape = kwargs.get("pad_shape")
-        if "pad_value" in kwargs:
-            pad_value = kwargs.get("pad_value")
 
-        if pad_shape is None:
-            raise ValueError("pad_shape is not provided.")
+        [pad_shape, pad_value], _ = parse_user_args(method, *args, **kwargs)
 
         if pad_value is not None:
-            if not isinstance(pad_value, (str, float, bool, int, bytes)):
-                raise TypeError("pad_value must be either a primitive python str, float, bool, int or bytes")
-            kwargs["pad_value"] = pad_value
-
-        if not isinstance(pad_shape, list):
-            raise TypeError("pad_shape must be a list")
+            type_check(pad_value, (str, float, bool, int, bytes), "pad_value")
+        type_check(pad_shape, (list,), "pad_end")
 
         for dim in pad_shape:
             if dim is not None:
@@ -284,9 +149,7 @@ def check_pad_end(method):
                 else:
                     raise TypeError("a value in the list is not an integer.")
 
-        kwargs["pad_shape"] = pad_shape
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -296,31 +159,24 @@ def check_concat_type(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        axis, prepend, append = (list(args) + 3 * [None])[:3]
-        if "prepend" in kwargs:
-            prepend = kwargs.get("prepend")
-        if "append" in kwargs:
-            append = kwargs.get("append")
-        if "axis" in kwargs:
-            axis = kwargs.get("axis")
+
+        [axis, prepend, append], _ = parse_user_args(method, *args, **kwargs)
 
         if axis is not None:
-            if not isinstance(axis, int):
-                raise TypeError("axis type is not valid, must be an integer.")
+            type_check(axis, (int,), "axis")
             if axis not in (0, -1):
                 raise ValueError("only 1D concatenation supported.")
-            kwargs["axis"] = axis
 
         if prepend is not None:
-            if not isinstance(prepend, (type(None), np.ndarray)):
-                raise ValueError("prepend type is not valid, must be None for no prepend tensor or a numpy array.")
-            kwargs["prepend"] = prepend
+            type_check(prepend, (np.ndarray,), "prepend")
+            if len(prepend.shape) != 1:
+                raise ValueError("can only prepend 1D arrays.")
 
         if append is not None:
-            if not isinstance(append, (type(None), np.ndarray)):
-                raise ValueError("append type is not valid, must be None for no append tensor or a numpy array.")
-            kwargs["append"] = append
+            type_check(append, (np.ndarray,), "append")
+            if len(append.shape) != 1:
+                raise ValueError("can only append 1D arrays.")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
diff --git a/mindspore/dataset/transforms/vision/c_transforms.py b/mindspore/dataset/transforms/vision/c_transforms.py
index 43ac037541e..8e3b7c72141 100644
--- a/mindspore/dataset/transforms/vision/c_transforms.py
+++ b/mindspore/dataset/transforms/vision/c_transforms.py
@@ -40,12 +40,14 @@ Examples:
         >>> dataset = dataset.map(input_columns="image", operations=transforms_list)
         >>> dataset = dataset.map(input_columns="label", operations=onehot_op)
 """
+import numbers
 import mindspore._c_dataengine as cde
 
 from .utils import Inter, Border
 from .validators import check_prob, check_crop, check_resize_interpolation, check_random_resize_crop, \
-    check_normalize_c, check_random_crop, check_random_color_adjust, check_random_rotation, \
-    check_resize, check_rescale, check_pad, check_cutout, check_uniform_augment_cpp, check_bounding_box_augment_cpp
+    check_normalize_c, check_random_crop, check_random_color_adjust, check_random_rotation, check_range, \
+    check_resize, check_rescale, check_pad, check_cutout, check_uniform_augment_cpp, check_bounding_box_augment_cpp, \
+    FLOAT_MAX_INTEGER
 
 DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR,
                    Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR,
@@ -57,6 +59,18 @@ DE_C_BORDER_TYPE = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT,
                     Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC}
 
 
+def parse_padding(padding):
+    if isinstance(padding, numbers.Number):
+        padding = [padding] * 4
+    if len(padding) == 2:
+        left = right = padding[0]
+        top = bottom = padding[1]
+        padding = (left, top, right, bottom,)
+    if isinstance(padding, list):
+        padding = tuple(padding)
+    return padding
+
+
 class Decode(cde.DecodeOp):
     """
     Decode the input image in RGB mode.
@@ -136,16 +150,22 @@ class RandomCrop(cde.RandomCropOp):
 
     @check_random_crop
     def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT):
+        if isinstance(size, int):
+            size = (size, size)
+        if padding is None:
+            padding = (0, 0, 0, 0)
+        else:
+            padding = parse_padding(padding)
+        if isinstance(fill_value, int):  # temporary fix
+            fill_value = tuple([fill_value] * 3)
+        border_type = DE_C_BORDER_TYPE[padding_mode]
+
         self.size = size
         self.padding = padding
         self.pad_if_needed = pad_if_needed
         self.fill_value = fill_value
         self.padding_mode = padding_mode.value
-        if padding is None:
-            padding = (0, 0, 0, 0)
-        if isinstance(fill_value, int):  # temporary fix
-            fill_value = tuple([fill_value] * 3)
-        border_type = DE_C_BORDER_TYPE[padding_mode]
+
         super().__init__(*size, *padding, border_type, pad_if_needed, *fill_value)
 
 
@@ -184,16 +204,23 @@ class RandomCropWithBBox(cde.RandomCropWithBBoxOp):
 
     @check_random_crop
     def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT):
+        if isinstance(size, int):
+            size = (size, size)
+        if padding is None:
+            padding = (0, 0, 0, 0)
+        else:
+            padding = parse_padding(padding)
+
+        if isinstance(fill_value, int):  # temporary fix
+            fill_value = tuple([fill_value] * 3)
+        border_type = DE_C_BORDER_TYPE[padding_mode]
+
         self.size = size
         self.padding = padding
         self.pad_if_needed = pad_if_needed
         self.fill_value = fill_value
         self.padding_mode = padding_mode.value
-        if padding is None:
-            padding = (0, 0, 0, 0)
-        if isinstance(fill_value, int):  # temporary fix
-            fill_value = tuple([fill_value] * 3)
-        border_type = DE_C_BORDER_TYPE[padding_mode]
+
         super().__init__(*size, *padding, border_type, pad_if_needed, *fill_value)
 
 
@@ -292,6 +319,8 @@ class Resize(cde.ResizeOp):
 
     @check_resize_interpolation
     def __init__(self, size, interpolation=Inter.LINEAR):
+        if isinstance(size, int):
+            size = (size, size)
         self.size = size
         self.interpolation = interpolation
         interpoltn = DE_C_INTER_MODE[interpolation]
@@ -359,6 +388,8 @@ class RandomResizedCropWithBBox(cde.RandomCropAndResizeWithBBoxOp):
     @check_random_resize_crop
     def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
                  interpolation=Inter.BILINEAR, max_attempts=10):
+        if isinstance(size, int):
+            size = (size, size)
         self.size = size
         self.scale = scale
         self.ratio = ratio
@@ -396,6 +427,8 @@ class RandomResizedCrop(cde.RandomCropAndResizeOp):
     @check_random_resize_crop
     def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
                  interpolation=Inter.BILINEAR, max_attempts=10):
+        if isinstance(size, int):
+            size = (size, size)
         self.size = size
         self.scale = scale
         self.ratio = ratio
@@ -417,6 +450,8 @@ class CenterCrop(cde.CenterCropOp):
 
     @check_crop
     def __init__(self, size):
+        if isinstance(size, int):
+            size = (size, size)
         self.size = size
         super().__init__(*size)
 
@@ -442,12 +477,26 @@ class RandomColorAdjust(cde.RandomColorAdjustOp):
 
     @check_random_color_adjust
     def __init__(self, brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0)):
+        brightness = self.expand_values(brightness)
+        contrast = self.expand_values(contrast)
+        saturation = self.expand_values(saturation)
+        hue = self.expand_values(hue, center=0, bound=(-0.5, 0.5), non_negative=False)
+
         self.brightness = brightness
         self.contrast = contrast
         self.saturation = saturation
         self.hue = hue
+
         super().__init__(*brightness, *contrast, *saturation, *hue)
 
+    def expand_values(self, value, center=1, bound=(0, FLOAT_MAX_INTEGER), non_negative=True):
+        if isinstance(value, numbers.Number):
+            value = [center - value, center + value]
+            if non_negative:
+                value[0] = max(0, value[0])
+            check_range(value, bound)
+        return (value[0], value[1])
+
 
 class RandomRotation(cde.RandomRotationOp):
     """
@@ -485,6 +534,8 @@ class RandomRotation(cde.RandomRotationOp):
         self.expand = expand
         self.center = center
         self.fill_value = fill_value
+        if isinstance(degrees, numbers.Number):
+            degrees = (-degrees, degrees)
         if center is None:
             center = (-1, -1)
         if isinstance(fill_value, int):  # temporary fix
@@ -584,6 +635,8 @@ class RandomCropDecodeResize(cde.RandomCropDecodeResizeOp):
     @check_random_resize_crop
     def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
                  interpolation=Inter.BILINEAR, max_attempts=10):
+        if isinstance(size, int):
+            size = (size, size)
         self.size = size
         self.scale = scale
         self.ratio = ratio
@@ -623,12 +676,14 @@ class Pad(cde.PadOp):
 
     @check_pad
     def __init__(self, padding, fill_value=0, padding_mode=Border.CONSTANT):
-        self.padding = padding
-        self.fill_value = fill_value
-        self.padding_mode = padding_mode
+        padding = parse_padding(padding)
         if isinstance(fill_value, int):  # temporary fix
             fill_value = tuple([fill_value] * 3)
         padding_mode = DE_C_BORDER_TYPE[padding_mode]
+
+        self.padding = padding
+        self.fill_value = fill_value
+        self.padding_mode = padding_mode
         super().__init__(*padding, padding_mode, *fill_value)
 
 
diff --git a/mindspore/dataset/transforms/vision/py_transforms.py b/mindspore/dataset/transforms/vision/py_transforms.py
index b252c3434b9..3bfd6b0644f 100644
--- a/mindspore/dataset/transforms/vision/py_transforms.py
+++ b/mindspore/dataset/transforms/vision/py_transforms.py
@@ -28,6 +28,7 @@ import numpy as np
 from PIL import Image
 
 from . import py_transforms_util as util
+from .c_transforms import parse_padding
 from .validators import check_prob, check_crop, check_resize_interpolation, check_random_resize_crop, \
     check_normalize_py, check_random_crop, check_random_color_adjust, check_random_rotation, \
     check_transforms_list, check_random_apply, check_ten_crop, check_num_channels, check_pad, \
@@ -295,6 +296,10 @@ class RandomCrop:
 
     @check_random_crop
     def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT):
+        if padding is None:
+            padding = (0, 0, 0, 0)
+        else:
+            padding = parse_padding(padding)
         self.size = size
         self.padding = padding
         self.pad_if_needed = pad_if_needed
@@ -753,6 +758,8 @@ class TenCrop:
 
     @check_ten_crop
     def __init__(self, size, use_vertical_flip=False):
+        if isinstance(size, int):
+            size = (size, size)
         self.size = size
         self.use_vertical_flip = use_vertical_flip
 
@@ -877,6 +884,8 @@ class Pad:
 
     @check_pad
     def __init__(self, padding, fill_value=0, padding_mode=Border.CONSTANT):
+        parse_padding(padding)
+
         self.padding = padding
         self.fill_value = fill_value
         self.padding_mode = DE_PY_BORDER_TYPE[padding_mode]
@@ -1129,56 +1138,23 @@ class RandomAffine:
     def __init__(self, degrees, translate=None, scale=None, shear=None, resample=Inter.NEAREST, fill_value=0):
         # Parameter checking
         # rotation
-        if isinstance(degrees, numbers.Number):
-            if degrees < 0:
-                raise ValueError("If degrees is a single number, it must be positive.")
-            self.degrees = (-degrees, degrees)
-        elif isinstance(degrees, (tuple, list)) and len(degrees) == 2:
-            self.degrees = degrees
-        else:
-            raise TypeError("If degrees is a list or tuple, it must be of length 2.")
-
-        # translation
-        if translate is not None:
-            if isinstance(translate, (tuple, list)) and len(translate) == 2:
-                for t in translate:
-                    if t < 0.0 or t > 1.0:
-                        raise ValueError("translation values should be between 0 and 1")
-            else:
-                raise TypeError("translate should be a list or tuple of length 2.")
-        self.translate = translate
-
-        # scale
-        if scale is not None:
-            if isinstance(scale, (tuple, list)) and len(scale) == 2:
-                for s in scale:
-                    if s <= 0:
-                        raise ValueError("scale values should be positive")
-            else:
-                raise TypeError("scale should be a list or tuple of length 2.")
-        self.scale_ranges = scale
-
-        # shear
         if shear is not None:
             if isinstance(shear, numbers.Number):
-                if shear < 0:
-                    raise ValueError("If shear is a single number, it must be positive.")
-                self.shear = (-1 * shear, shear)
-            elif isinstance(shear, (tuple, list)) and (len(shear) == 2 or len(shear) == 4):
-                # X-Axis shear with [min, max]
-                if len(shear) == 2:
-                    self.shear = [shear[0], shear[1], 0., 0.]
-                elif len(shear) == 4:
-                    self.shear = [s for s in shear]
+                shear = (-1 * shear, shear)
             else:
-                raise TypeError("shear should be a list or tuple and it must be of length 2 or 4.")
-        else:
-            self.shear = shear
+                if len(shear) == 2:
+                    shear = [shear[0], shear[1], 0., 0.]
+                elif len(shear) == 4:
+                    shear = [s for s in shear]
 
-        # resample
+        if isinstance(degrees, numbers.Number):
+            degrees = (-degrees, degrees)
+
+        self.degrees = degrees
+        self.translate = translate
+        self.scale_ranges = scale
+        self.shear = shear
         self.resample = DE_PY_INTER_MODE[resample]
-
-        # fill_value
         self.fill_value = fill_value
 
     def __call__(self, img):
diff --git a/mindspore/dataset/transforms/vision/validators.py b/mindspore/dataset/transforms/vision/validators.py
index b49116349bb..4cb66133592 100644
--- a/mindspore/dataset/transforms/vision/validators.py
+++ b/mindspore/dataset/transforms/vision/validators.py
@@ -16,47 +16,35 @@
 """
 import numbers
 from functools import wraps
-
+import numpy as np
 from mindspore._c_dataengine import TensorOp
 
 from .utils import Inter, Border
-from ...transforms.validators import check_pos_int32, check_pos_float32, check_value, check_uint8, FLOAT_MAX_INTEGER, \
-    check_bool, check_2tuple, check_range, check_list, check_type, check_positive, INT32_MAX
-
-
-def check_inter_mode(mode):
-    if not isinstance(mode, Inter):
-        raise ValueError("Invalid interpolation mode.")
-
-
-def check_border_type(mode):
-    if not isinstance(mode, Border):
-        raise ValueError("Invalid padding mode.")
+from ...core.validator_helpers import check_value, check_uint8, FLOAT_MAX_INTEGER, check_pos_float32, \
+    check_2tuple, check_range, check_positive, INT32_MAX, parse_user_args, type_check, type_check_list
 
 
 def check_crop_size(size):
     """Wrapper method to check the parameters of crop size."""
+    type_check(size, (int, list, tuple), "size")
     if isinstance(size, int):
-        size = (size, size)
+        check_value(size, (1, FLOAT_MAX_INTEGER))
     elif isinstance(size, (tuple, list)) and len(size) == 2:
-        size = size
+        for value in size:
+            check_value(value, (1, FLOAT_MAX_INTEGER))
     else:
         raise TypeError("Size should be a single integer or a list/tuple (h, w) of length 2.")
-    for value in size:
-        check_pos_int32(value)
-    return size
 
 
 def check_resize_size(size):
     """Wrapper method to check the parameters of resize."""
     if isinstance(size, int):
-        check_pos_int32(size)
+        check_value(size, (1, FLOAT_MAX_INTEGER))
     elif isinstance(size, (tuple, list)) and len(size) == 2:
-        for value in size:
-            check_value(value, (1, INT32_MAX))
+        for i, value in enumerate(size):
+            check_value(value, (1, INT32_MAX), "size at dim {0}".format(i))
     else:
         raise TypeError("Size should be a single integer or a list/tuple (h, w) of length 2.")
-    return size
 
 
 def check_normalize_c_param(mean, std):
@@ -72,9 +60,9 @@ def check_normalize_py_param(mean, std):
     if len(mean) != len(std):
         raise ValueError("Length of mean and std must be equal")
     for mean_value in mean:
-        check_value(mean_value, [0., 1.])
+        check_value(mean_value, [0., 1.], "mean_value")
     for std_value in std:
-        check_value(std_value, [0., 1.])
+        check_value(std_value, [0., 1.], "std_value")
 
 
 def check_fill_value(fill_value):
@@ -85,66 +73,37 @@ def check_fill_value(fill_value):
             check_uint8(value)
     else:
         raise TypeError("fill_value should be a single integer or a 3-tuple.")
-    return fill_value
 
 
 def check_padding(padding):
     """Parsing the padding arguments and check if it is legal."""
-    if isinstance(padding, numbers.Number):
-        top = bottom = left = right = padding
-
-    elif isinstance(padding, (tuple, list)):
-        if len(padding) == 2:
-            left = right = padding[0]
-            top = bottom = padding[1]
-        elif len(padding) == 4:
-            left = padding[0]
-            top = padding[1]
-            right = padding[2]
-            bottom = padding[3]
-        else:
+    type_check(padding, (tuple, list, numbers.Number), "padding")
+    if isinstance(padding, (tuple, list)):
+        if len(padding) not in (2, 4):
             raise ValueError("The size of the padding list or tuple should be 2 or 4.")
-    else:
-        raise TypeError("Padding can be any of: a number, a tuple or list of size 2 or 4.")
-    if not (isinstance(left, int) and isinstance(top, int) and isinstance(right, int) and isinstance(bottom, int)):
-        raise TypeError("Padding value should be integer.")
-    if left < 0 or top < 0 or right < 0 or bottom < 0:
-        raise ValueError("Padding value could not be negative.")
-    return left, top, right, bottom
+        for i, pad_value in enumerate(padding):
+            type_check(pad_value, (int,), "padding[{}]".format(i))
+            check_value(pad_value, (0, INT32_MAX), "pad_value")
 
 
 def check_degrees(degrees):
     """Check if the degrees is legal."""
+    type_check(degrees, (numbers.Number, list, tuple), "degrees")
     if isinstance(degrees, numbers.Number):
-        if degrees < 0:
-            raise ValueError("If degrees is a single number, it cannot be negative.")
-        degrees = (-degrees, degrees)
+        check_value(degrees, (0, float("inf")), "degrees")
     elif isinstance(degrees, (list, tuple)):
         if len(degrees) != 2:
             raise TypeError("If degrees is a sequence, the length must be 2.")
-    else:
-        raise TypeError("Degrees must be a single non-negative number or a sequence")
-    return degrees
 
 
 def check_random_color_adjust_param(value, input_name, center=1, bound=(0, FLOAT_MAX_INTEGER), non_negative=True):
     """Check the parameters in random color adjust operation."""
+    type_check(value, (numbers.Number, list, tuple), input_name)
     if isinstance(value, numbers.Number):
         if value < 0:
             raise ValueError("The input value of {} cannot be negative.".format(input_name))
-        # convert value into a range
-        value = [center - value, center + value]
-        if non_negative:
-            value[0] = max(0, value[0])
     elif isinstance(value, (list, tuple)) and len(value) == 2:
-        if not bound[0] <= value[0] <= value[1] <= bound[1]:
-            raise ValueError("Please check your value range of {} is valid and "
-                             "within the bound {}".format(input_name, bound))
-    else:
-        raise TypeError("Input of {} should be either a single value, or a list/tuple of "
-                        "length 2.".format(input_name))
-    factor = (value[0], value[1])
-    return factor
+        check_range(value, bound)
 
 
 def check_erasing_value(value):
@@ -155,173 +114,105 @@ def check_erasing_value(value):
 
 
 def check_crop(method):
-    """A wrapper that wrap a parameter checker to the original function(crop operation)."""
+    """A wrapper that wraps a parameter checker to the original function(crop operation)."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        size = (list(args) + [None])[0]
-        if "size" in kwargs:
-            size = kwargs.get("size")
-        if size is None:
-            raise ValueError("size is not provided.")
-        size = check_crop_size(size)
-        kwargs["size"] = size
+        [size], _ = parse_user_args(method, *args, **kwargs)
+        check_crop_size(size)
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_resize_interpolation(method):
-    """A wrapper that wrap a parameter checker to the original function(resize interpolation operation)."""
+    """A wrapper that wraps a parameter checker to the original function(resize interpolation operation)."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 2 * [None])[:2]
-        size, interpolation = args
-        if "size" in kwargs:
-            size = kwargs.get("size")
-        if "interpolation" in kwargs:
-            interpolation = kwargs.get("interpolation")
-
-        if size is None:
-            raise ValueError("size is not provided.")
-        size = check_resize_size(size)
-        kwargs["size"] = size
-
+        [size, interpolation], _ = parse_user_args(method, *args, **kwargs)
+        check_resize_size(size)
         if interpolation is not None:
-            check_inter_mode(interpolation)
-            kwargs["interpolation"] = interpolation
+            type_check(interpolation, (Inter,), "interpolation")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_resize(method):
-    """A wrapper that wrap a parameter checker to the original function(resize operation)."""
+    """A wrapper that wraps a parameter checker to the original function(resize operation)."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        size = (list(args) + [None])[0]
-        if "size" in kwargs:
-            size = kwargs.get("size")
+        [size], _ = parse_user_args(method, *args, **kwargs)
+        check_resize_size(size)
 
-        if size is None:
-            raise ValueError("size is not provided.")
-        size = check_resize_size(size)
-        kwargs["size"] = size
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_random_resize_crop(method):
-    """A wrapper that wrap a parameter checker to the original function(random resize crop operation)."""
+    """A wrapper that wraps a parameter checker to the original function(random resize crop operation)."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 5 * [None])[:5]
-        size, scale, ratio, interpolation, max_attempts = args
-        if "size" in kwargs:
-            size = kwargs.get("size")
-        if "scale" in kwargs:
-            scale = kwargs.get("scale")
-        if "ratio" in kwargs:
-            ratio = kwargs.get("ratio")
-        if "interpolation" in kwargs:
-            interpolation = kwargs.get("interpolation")
-        if "max_attempts" in kwargs:
-            max_attempts = kwargs.get("max_attempts")
-
-        if size is None:
-            raise ValueError("size is not provided.")
-        size = check_crop_size(size)
-        kwargs["size"] = size
+        [size, scale, ratio, interpolation, max_attempts], _ = parse_user_args(method, *args, **kwargs)
+        check_crop_size(size)
 
         if scale is not None:
             check_range(scale, [0, FLOAT_MAX_INTEGER])
-            kwargs["scale"] = scale
         if ratio is not None:
             check_range(ratio, [0, FLOAT_MAX_INTEGER])
-            check_positive(ratio[0])
-            kwargs["ratio"] = ratio
+            check_positive(ratio[0], "ratio[0]")
         if interpolation is not None:
-            check_inter_mode(interpolation)
-            kwargs["interpolation"] = interpolation
+            type_check(interpolation, (Inter,), "interpolation")
         if max_attempts is not None:
-            check_pos_int32(max_attempts)
-            kwargs["max_attempts"] = max_attempts
+            check_value(max_attempts, (1, FLOAT_MAX_INTEGER))
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_prob(method):
-    """A wrapper that wrap a parameter checker(check the probability) to the original function."""
+    """A wrapper that wraps a parameter checker(check the probability) to the original function."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        prob = (list(args) + [None])[0]
-        if "prob" in kwargs:
-            prob = kwargs.get("prob")
-        if prob is not None:
-            check_value(prob, [0., 1.])
-            kwargs["prob"] = prob
+        [prob], _ = parse_user_args(method, *args, **kwargs)
+        type_check(prob, (float, int,), "prob")
+        check_value(prob, [0., 1.], "prob")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_normalize_c(method):
-    """A wrapper that wrap a parameter checker to the original function(normalize operation written in C++)."""
+    """A wrapper that wraps a parameter checker to the original function(normalize operation written in C++)."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 2 * [None])[:2]
-        mean, std = args
-        if "mean" in kwargs:
-            mean = kwargs.get("mean")
-        if "std" in kwargs:
-            std = kwargs.get("std")
-
-        if mean is None:
-            raise ValueError("mean is not provided.")
-        if std is None:
-            raise ValueError("std is not provided.")
+        [mean, std], _ = parse_user_args(method, *args, **kwargs)
         check_normalize_c_param(mean, std)
-        kwargs["mean"] = mean
-        kwargs["std"] = std
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
 
 def check_normalize_py(method):
-    """A wrapper that wrap a parameter checker to the original function(normalize operation written in Python)."""
+    """A wrapper that wraps a parameter checker to the original function(normalize operation written in Python)."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 2 * [None])[:2]
-        mean, std = args
-        if "mean" in kwargs:
-            mean = kwargs.get("mean")
-        if "std" in kwargs:
-            std = kwargs.get("std")
-
-        if mean is None:
-            raise ValueError("mean is not provided.")
-        if std is None:
-            raise ValueError("std is not provided.")
+        [mean, std], _ = parse_user_args(method, *args, **kwargs)
         check_normalize_py_param(mean, std)
-        kwargs["mean"] = mean
-        kwargs["std"] = std
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -331,38 +222,17 @@ def check_random_crop(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 5 * [None])[:5]
-        size, padding, pad_if_needed, fill_value, padding_mode = args
-
-        if "size" in kwargs:
-            size = kwargs.get("size")
-        if "padding" in kwargs:
-            padding = kwargs.get("padding")
-        if "fill_value" in kwargs:
-            fill_value = kwargs.get("fill_value")
-        if "padding_mode" in kwargs:
-            padding_mode = kwargs.get("padding_mode")
-        if "pad_if_needed" in kwargs:
-            pad_if_needed = kwargs.get("pad_if_needed")
-
-        if size is None:
-            raise ValueError("size is not provided.")
-        size = check_crop_size(size)
-        kwargs["size"] = size
-
+        [size, padding, pad_if_needed, fill_value, padding_mode], _ = parse_user_args(method, *args, **kwargs)
+        check_crop_size(size)
+        type_check(pad_if_needed, (bool,), "pad_if_needed")
         if padding is not None:
-            padding = check_padding(padding)
-            kwargs["padding"] = padding
+            check_padding(padding)
         if fill_value is not None:
-            fill_value = check_fill_value(fill_value)
-            kwargs["fill_value"] = fill_value
+            check_fill_value(fill_value)
         if padding_mode is not None:
-            check_border_type(padding_mode)
-            kwargs["padding_mode"] = padding_mode
-        if pad_if_needed is not None:
-            kwargs["pad_if_needed"] = pad_if_needed
+            type_check(padding_mode, (Border,), "padding_mode")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -372,27 +242,13 @@ def check_random_color_adjust(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 4 * [None])[:4]
-        brightness, contrast, saturation, hue = args
-        if "brightness" in kwargs:
-            brightness = kwargs.get("brightness")
-        if "contrast" in kwargs:
-            contrast = kwargs.get("contrast")
-        if "saturation" in kwargs:
-            saturation = kwargs.get("saturation")
-        if "hue" in kwargs:
-            hue = kwargs.get("hue")
+        [brightness, contrast, saturation, hue], _ = parse_user_args(method, *args, **kwargs)
+        check_random_color_adjust_param(brightness, "brightness")
+        check_random_color_adjust_param(contrast, "contrast")
+        check_random_color_adjust_param(saturation, "saturation")
+        check_random_color_adjust_param(hue, 'hue', center=0, bound=(-0.5, 0.5), non_negative=False)
 
-        if brightness is not None:
-            kwargs["brightness"] = check_random_color_adjust_param(brightness, "brightness")
-        if contrast is not None:
-            kwargs["contrast"] = check_random_color_adjust_param(contrast, "contrast")
-        if saturation is not None:
-            kwargs["saturation"] = check_random_color_adjust_param(saturation, "saturation")
-        if hue is not None:
-            kwargs["hue"] = check_random_color_adjust_param(hue, 'hue', center=0, bound=(-0.5, 0.5), non_negative=False)
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -402,38 +258,19 @@ def check_random_rotation(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 5 * [None])[:5]
-        degrees, resample, expand, center, fill_value = args
-        if "degrees" in kwargs:
-            degrees = kwargs.get("degrees")
-        if "resample" in kwargs:
-            resample = kwargs.get("resample")
-        if "expand" in kwargs:
-            expand = kwargs.get("expand")
-        if "center" in kwargs:
-            center = kwargs.get("center")
-        if "fill_value" in kwargs:
-            fill_value = kwargs.get("fill_value")
-
-        if degrees is None:
-            raise ValueError("degrees is not provided.")
-        degrees = check_degrees(degrees)
-        kwargs["degrees"] = degrees
+        [degrees, resample, expand, center, fill_value], _ = parse_user_args(method, *args, **kwargs)
+        check_degrees(degrees)
 
         if resample is not None:
-            check_inter_mode(resample)
-            kwargs["resample"] = resample
+            type_check(resample, (Inter,), "resample")
         if expand is not None:
-            check_bool(expand)
-            kwargs["expand"] = expand
+            type_check(expand, (bool,), "expand")
         if center is not None:
-            check_2tuple(center)
-            kwargs["center"] = center
+            check_2tuple(center, "center")
         if fill_value is not None:
-            fill_value = check_fill_value(fill_value)
-            kwargs["fill_value"] = fill_value
+            check_fill_value(fill_value)
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -443,16 +280,11 @@ def check_transforms_list(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        transforms = (list(args) + [None])[0]
-        if "transforms" in kwargs:
-            transforms = kwargs.get("transforms")
-        if transforms is None:
-            raise ValueError("transforms is not provided.")
+        [transforms], _ = parse_user_args(method, *args, **kwargs)
 
-        check_list(transforms)
-        kwargs["transforms"] = transforms
+        type_check(transforms, (list,), "transforms")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -462,21 +294,14 @@ def check_random_apply(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        transforms, prob = (list(args) + 2 * [None])[:2]
-        if "transforms" in kwargs:
-            transforms = kwargs.get("transforms")
-        if transforms is None:
-            raise ValueError("transforms is not provided.")
-        check_list(transforms)
-        kwargs["transforms"] = transforms
+        [transforms, prob], _ = parse_user_args(method, *args, **kwargs)
+        type_check(transforms, (list,), "transforms")
 
-        if "prob" in kwargs:
-            prob = kwargs.get("prob")
         if prob is not None:
-            check_value(prob, [0., 1.])
-            kwargs["prob"] = prob
+            type_check(prob, (float, int,), "prob")
+            check_value(prob, [0., 1.], "prob")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -486,23 +311,13 @@ def check_ten_crop(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 2 * [None])[:2]
-        size, use_vertical_flip = args
-        if "size" in kwargs:
-            size = kwargs.get("size")
-        if "use_vertical_flip" in kwargs:
-            use_vertical_flip = kwargs.get("use_vertical_flip")
-
-        if size is None:
-            raise ValueError("size is not provided.")
-        size = check_crop_size(size)
-        kwargs["size"] = size
+        [size, use_vertical_flip], _ = parse_user_args(method, *args, **kwargs)
+        check_crop_size(size)
 
         if use_vertical_flip is not None:
-            check_bool(use_vertical_flip)
-            kwargs["use_vertical_flip"] = use_vertical_flip
+            type_check(use_vertical_flip, (bool,), "use_vertical_flip")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -512,16 +327,13 @@ def check_num_channels(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        num_output_channels = (list(args) + [None])[0]
-        if "num_output_channels" in kwargs:
-            num_output_channels = kwargs.get("num_output_channels")
+        [num_output_channels], _ = parse_user_args(method, *args, **kwargs)
         if num_output_channels is not None:
             if num_output_channels not in (1, 3):
                 raise ValueError("Number of channels of the output grayscale image"
                                  "should be either 1 or 3. Got {0}".format(num_output_channels))
-            kwargs["num_output_channels"] = num_output_channels
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -531,28 +343,12 @@ def check_pad(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 3 * [None])[:3]
-        padding, fill_value, padding_mode = args
-        if "padding" in kwargs:
-            padding = kwargs.get("padding")
-        if "fill_value" in kwargs:
-            fill_value = kwargs.get("fill_value")
-        if "padding_mode" in kwargs:
-            padding_mode = kwargs.get("padding_mode")
+        [padding, fill_value, padding_mode], _ = parse_user_args(method, *args, **kwargs)
+        check_padding(padding)
+        check_fill_value(fill_value)
+        type_check(padding_mode, (Border,), "padding_mode")
 
-        if padding is None:
-            raise ValueError("padding is not provided.")
-        padding = check_padding(padding)
-        kwargs["padding"] = padding
-
-        if fill_value is not None:
-            fill_value = check_fill_value(fill_value)
-            kwargs["fill_value"] = fill_value
-        if padding_mode is not None:
-            check_border_type(padding_mode)
-            kwargs["padding_mode"] = padding_mode
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -562,26 +358,13 @@ def check_random_perspective(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 3 * [None])[:3]
-        distortion_scale, prob, interpolation = args
-        if "distortion_scale" in kwargs:
-            distortion_scale = kwargs.get("distortion_scale")
-        if "prob" in kwargs:
-            prob = kwargs.get("prob")
-        if "interpolation" in kwargs:
-            interpolation = kwargs.get("interpolation")
+        [distortion_scale, prob, interpolation], _ = parse_user_args(method, *args, **kwargs)
 
-        if distortion_scale is not None:
-            check_value(distortion_scale, [0., 1.])
-            kwargs["distortion_scale"] = distortion_scale
-        if prob is not None:
-            check_value(prob, [0., 1.])
-            kwargs["prob"] = prob
-        if interpolation is not None:
-            check_inter_mode(interpolation)
-            kwargs["interpolation"] = interpolation
+        check_value(distortion_scale, [0., 1.], "distortion_scale")
+        check_value(prob, [0., 1.], "prob")
+        type_check(interpolation, (Inter,), "interpolation")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -591,28 +374,13 @@ def check_mix_up(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 3 * [None])[:3]
-        batch_size, alpha, is_single = args
-        if "batch_size" in kwargs:
-            batch_size = kwargs.get("batch_size")
-        if "alpha" in kwargs:
-            alpha = kwargs.get("alpha")
-        if "is_single" in kwargs:
-            is_single = kwargs.get("is_single")
+        [batch_size, alpha, is_single], _ = parse_user_args(method, *args, **kwargs)
 
-        if batch_size is None:
-            raise ValueError("batch_size")
-        check_pos_int32(batch_size)
-        kwargs["batch_size"] = batch_size
-        if alpha is None:
-            raise ValueError("alpha")
-        check_positive(alpha)
-        kwargs["alpha"] = alpha
-        if is_single is not None:
-            check_type(is_single, bool)
-            kwargs["is_single"] = is_single
+        check_value(batch_size, (1, FLOAT_MAX_INTEGER))
+        check_positive(alpha, "alpha")
+        type_check(is_single, (bool,), "is_single")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -622,41 +390,16 @@ def check_random_erasing(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 6 * [None])[:6]
-        prob, scale, ratio, value, inplace, max_attempts = args
-        if "prob" in kwargs:
-            prob = kwargs.get("prob")
-        if "scale" in kwargs:
-            scale = kwargs.get("scale")
-        if "ratio" in kwargs:
-            ratio = kwargs.get("ratio")
-        if "value" in kwargs:
-            value = kwargs.get("value")
-        if "inplace" in kwargs:
-            inplace = kwargs.get("inplace")
-        if "max_attempts" in kwargs:
-            max_attempts = kwargs.get("max_attempts")
+        [prob, scale, ratio, value, inplace, max_attempts], _ = parse_user_args(method, *args, **kwargs)
 
-        if prob is not None:
-            check_value(prob, [0., 1.])
-            kwargs["prob"] = prob
-        if scale is not None:
-            check_range(scale, [0, FLOAT_MAX_INTEGER])
-            kwargs["scale"] = scale
-        if ratio is not None:
-            check_range(ratio, [0, FLOAT_MAX_INTEGER])
-            kwargs["ratio"] = ratio
-        if value is not None:
-            check_erasing_value(value)
-            kwargs["value"] = value
-        if inplace is not None:
-            check_bool(inplace)
-            kwargs["inplace"] = inplace
-        if max_attempts is not None:
-            check_pos_int32(max_attempts)
-            kwargs["max_attempts"] = max_attempts
+        check_value(prob, [0., 1.], "prob")
+        check_range(scale, [0, FLOAT_MAX_INTEGER])
+        check_range(ratio, [0, FLOAT_MAX_INTEGER])
+        check_erasing_value(value)
+        type_check(inplace, (bool,), "inplace")
+        check_value(max_attempts, (1, FLOAT_MAX_INTEGER))
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -666,23 +409,12 @@ def check_cutout(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 2 * [None])[:2]
-        length, num_patches = args
-        if "length" in kwargs:
-            length = kwargs.get("length")
-        if "num_patches" in kwargs:
-            num_patches = kwargs.get("num_patches")
+        [length, num_patches], _ = parse_user_args(method, *args, **kwargs)
 
-        if length is None:
-            raise ValueError("length")
-        check_pos_int32(length)
-        kwargs["length"] = length
+        check_value(length, (1, FLOAT_MAX_INTEGER))
+        check_value(num_patches, (1, FLOAT_MAX_INTEGER))
 
-        if num_patches is not None:
-            check_pos_int32(num_patches)
-            kwargs["num_patches"] = num_patches
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -692,17 +424,9 @@ def check_linear_transform(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 2 * [None])[:2]
-        transformation_matrix, mean_vector = args
-        if "transformation_matrix" in kwargs:
-            transformation_matrix = kwargs.get("transformation_matrix")
-        if "mean_vector" in kwargs:
-            mean_vector = kwargs.get("mean_vector")
-
-        if transformation_matrix is None:
-            raise ValueError("transformation_matrix is not provided.")
-        if mean_vector is None:
-            raise ValueError("mean_vector is not provided.")
+        [transformation_matrix, mean_vector], _ = parse_user_args(method, *args, **kwargs)
+        type_check(transformation_matrix, (np.ndarray,), "transformation_matrix")
+        type_check(mean_vector, (np.ndarray,), "mean_vector")
 
         if transformation_matrix.shape[0] != transformation_matrix.shape[1]:
             raise ValueError("transformation_matrix should be a square matrix. "
@@ -711,10 +435,7 @@ def check_linear_transform(method):
             raise ValueError("mean_vector length {0} should match either one dimension of the square"
                              "transformation_matrix {1}.".format(mean_vector.shape[0], transformation_matrix.shape))
 
-        kwargs["transformation_matrix"] = transformation_matrix
-        kwargs["mean_vector"] = mean_vector
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -724,67 +445,40 @@ def check_random_affine(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        args = (list(args) + 6 * [None])[:6]
-        degrees, translate, scale, shear, resample, fill_value = args
-        if "degrees" in kwargs:
-            degrees = kwargs.get("degrees")
-        if "translate" in kwargs:
-            translate = kwargs.get("translate")
-        if "scale" in kwargs:
-            scale = kwargs.get("scale")
-        if "shear" in kwargs:
-            shear = kwargs.get("shear")
-        if "resample" in kwargs:
-            resample = kwargs.get("resample")
-        if "fill_value" in kwargs:
-            fill_value = kwargs.get("fill_value")
-
-        if degrees is None:
-            raise ValueError("degrees is not provided.")
-        degrees = check_degrees(degrees)
-        kwargs["degrees"] = degrees
+        [degrees, translate, scale, shear, resample, fill_value], _ = parse_user_args(method, *args, **kwargs)
+        check_degrees(degrees)
 
         if translate is not None:
-            if isinstance(translate, (tuple, list)) and len(translate) == 2:
-                for t in translate:
-                    if t < 0.0 or t > 1.0:
-                        raise ValueError("translation values should be between 0 and 1")
-            else:
+            if type_check(translate, (list, tuple), "translate"):
+                translate_names = ["translate_{0}".format(i) for i in range(len(translate))]
+                type_check_list(translate, (int, float), translate_names)
+            if len(translate) != 2:
                 raise TypeError("translate should be a list or tuple of length 2.")
-            kwargs["translate"] = translate
+            for i, t in enumerate(translate):
+                check_value(t, [0.0, 1.0], "translate at {0}".format(i))
 
         if scale is not None:
-            if isinstance(scale, (tuple, list)) and len(scale) == 2:
-                for s in scale:
-                    if s <= 0:
-                        raise ValueError("scale values should be positive")
+            type_check(scale, (tuple, list), "scale")
+            if len(scale) == 2:
+                for i, s in enumerate(scale):
+                    check_positive(s, "scale[{}]".format(i))
             else:
                 raise TypeError("scale should be a list or tuple of length 2.")
-            kwargs["scale"] = scale
 
         if shear is not None:
+            type_check(shear, (numbers.Number, tuple, list), "shear")
             if isinstance(shear, numbers.Number):
-                if shear < 0:
-                    raise ValueError("If shear is a single number, it must be positive.")
-                shear = (-1 * shear, shear)
-            elif isinstance(shear, (tuple, list)) and (len(shear) == 2 or len(shear) == 4):
-                # X-Axis shear with [min, max]
-                if len(shear) == 2:
-                    shear = [shear[0], shear[1], 0., 0.]
-                elif len(shear) == 4:
-                    shear = [s for s in shear]
+                check_positive(shear, "shear")
             else:
-                raise TypeError("shear should be a list or tuple and it must be of length 2 or 4.")
-            kwargs["shear"] = shear
+                if len(shear) not in (2, 4):
+                    raise TypeError("shear must be of length 2 or 4.")
+
+            type_check(resample, (Inter,), "resample")
 
-        if resample is not None:
-            check_inter_mode(resample)
-            kwargs["resample"] = resample
         if fill_value is not None:
-            fill_value = check_fill_value(fill_value)
-            kwargs["fill_value"] = fill_value
+            check_fill_value(fill_value)
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -794,24 +488,11 @@ def check_rescale(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        rescale, shift = (list(args) + 2 * [None])[:2]
-        if "rescale" in kwargs:
-            rescale = kwargs.get("rescale")
-        if "shift" in kwargs:
-            shift = kwargs.get("shift")
-
-        if rescale is None:
-            raise ValueError("rescale is not provided.")
+        [rescale, shift], _ = parse_user_args(method, *args, **kwargs)
         check_pos_float32(rescale)
-        kwargs["rescale"] = rescale
+        type_check(shift, (numbers.Number,), "shift")
 
-        if shift is None:
-            raise ValueError("shift is not provided.")
-        if not isinstance(shift, numbers.Number):
-            raise TypeError("shift is not a number.")
-        kwargs["shift"] = shift
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -821,33 +502,16 @@ def check_uniform_augment_cpp(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        operations, num_ops = (list(args) + 2 * [None])[:2]
-        if "operations" in kwargs:
-            operations = kwargs.get("operations")
-        else:
-            raise ValueError("operations list required")
-        if "num_ops" in kwargs:
-            num_ops = kwargs.get("num_ops")
-        else:
-            num_ops = 2
+        [operations, num_ops], _ = parse_user_args(method, *args, **kwargs)
+        type_check(num_ops, (int,), "num_ops")
+        check_positive(num_ops, "num_ops")
 
-        if not isinstance(num_ops, int):
-            raise ValueError("Number of operations should be an integer.")
-
-        if num_ops <= 0:
-            raise ValueError("num_ops should be greater than zero")
         if num_ops > len(operations):
             raise ValueError("num_ops is greater than operations list size")
-        if not isinstance(operations, list):
-            raise TypeError("operations is not a python list")
-        for op in operations:
-            if not isinstance(op, TensorOp):
-                raise ValueError("operations list only accepts C++ operations.")
+        tensor_ops = ["tensor_op_{0}".format(i) for i in range(len(operations))]
+        type_check_list(operations, (TensorOp,), tensor_ops)
 
-        kwargs["num_ops"] = num_ops
-        kwargs["operations"] = operations
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -857,23 +521,11 @@ def check_bounding_box_augment_cpp(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        transform, ratio = (list(args) + 2 * [None])[:2]
-        if "transform" in kwargs:
-            transform = kwargs.get("transform")
-        if "ratio" in kwargs:
-            ratio = kwargs.get("ratio")
-        if not isinstance(ratio, float) and not isinstance(ratio, int):
-            raise ValueError("Ratio should be an int or float.")
-        if ratio is not None:
-            check_value(ratio, [0., 1.])
-            kwargs["ratio"] = ratio
-        else:
-            ratio = 0.3
-        if not isinstance(transform, TensorOp):
-            raise ValueError("Transform can only be a C++ operation.")
-        kwargs["transform"] = transform
-        kwargs["ratio"] = ratio
-        return method(self, **kwargs)
+        [transform, ratio], _ = parse_user_args(method, *args, **kwargs)
+        type_check(ratio, (float, int), "ratio")
+        check_value(ratio, [0., 1.], "ratio")
+        type_check(transform, (TensorOp,), "transform")
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -883,29 +535,22 @@ def check_uniform_augment_py(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        transforms, num_ops = (list(args) + 2 * [None])[:2]
-        if "transforms" in kwargs:
-            transforms = kwargs.get("transforms")
-        if transforms is None:
-            raise ValueError("transforms is not provided.")
+        [transforms, num_ops], _ = parse_user_args(method, *args, **kwargs)
+        type_check(transforms, (list,), "transforms")
+
         if not transforms:
             raise ValueError("transforms list is empty.")
-        check_list(transforms)
+
         for transform in transforms:
             if isinstance(transform, TensorOp):
                 raise ValueError("transform list only accepts Python operations.")
-        kwargs["transforms"] = transforms
 
-        if "num_ops" in kwargs:
-            num_ops = kwargs.get("num_ops")
-        if num_ops is not None:
-            check_type(num_ops, int)
-            check_positive(num_ops)
-            if num_ops > len(transforms):
-                raise ValueError("num_ops cannot be greater than the length of transforms list.")
-            kwargs["num_ops"] = num_ops
+        type_check(num_ops, (int,), "num_ops")
+        check_positive(num_ops, "num_ops")
+        if num_ops > len(transforms):
+            raise ValueError("num_ops cannot be greater than the length of transforms list.")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -915,22 +560,16 @@ def check_positive_degrees(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        degrees = (list(args) + [None])[0]
-        if "degrees" in kwargs:
-            degrees = kwargs.get("degrees")
+        [degrees], _ = parse_user_args(method, *args, **kwargs)
 
-        if degrees is not None:
-            if isinstance(degrees, (list, tuple)):
-                if len(degrees) != 2:
-                    raise ValueError("Degrees must be a sequence with length 2.")
-                if degrees[0] < 0:
-                    raise ValueError("Degrees range must be non-negative.")
-                if degrees[0] > degrees[1]:
-                    raise ValueError("Degrees should be in (min,max) format. Got (max,min).")
-            else:
-                raise TypeError("Degrees must be a sequence in (min,max) format.")
+        if isinstance(degrees, (list, tuple)):
+            if len(degrees) != 2:
+                raise ValueError("Degrees must be a sequence with length 2.")
+            check_positive(degrees[0], "degrees[0]")
+            if degrees[0] > degrees[1]:
+                raise ValueError("Degrees should be in (min,max) format. Got (max,min).")
 
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
 
@@ -940,18 +579,12 @@ def check_compose_list(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        transforms = (list(args) + [None])[0]
-        if "transforms" in kwargs:
-            transforms = kwargs.get("transforms")
-        if transforms is None:
-            raise ValueError("transforms is not provided.")
+        [transforms], _ = parse_user_args(method, *args, **kwargs)
+
+        type_check(transforms, (list,), transforms)
         if not transforms:
             raise ValueError("transforms list is empty.")
-        if not isinstance(transforms, list):
-            raise TypeError("transforms is not a python list")
 
-        kwargs["transforms"] = transforms
-
-        return method(self, **kwargs)
+        return method(self, *args, **kwargs)
 
     return new_method
diff --git a/mindspore/model_zoo/__init__.py b/mindspore/model_zoo/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/mindspore/nn/__init__.py b/mindspore/nn/__init__.py
index 8d5e7d3b0a1..e5c133a9a62 100644
--- a/mindspore/nn/__init__.py
+++ b/mindspore/nn/__init__.py
@@ -17,13 +17,15 @@ Neural Networks Cells.
 
 Pre-defined building blocks or computing units to construct Neural Networks.
 """
-from . import layer, loss, optim, metrics, wrap
+from . import layer, loss, optim, metrics, wrap, distribution
 from .cell import Cell, GraphKernel
 from .layer import *
 from .loss import *
 from .optim import *
 from .metrics import *
 from .wrap import *
+from .distribution import *
+
 
 __all__ = ["Cell", "GraphKernel"]
 __all__.extend(layer.__all__)
@@ -31,5 +33,7 @@ __all__.extend(loss.__all__)
 __all__.extend(optim.__all__)
 __all__.extend(metrics.__all__)
 __all__.extend(wrap.__all__)
+__all__.extend(distribution.__all__)
+
 
 __all__.sort()
diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py
index cffe00a9201..3eec96f0b5f 100755
--- a/mindspore/nn/cell.py
+++ b/mindspore/nn/cell.py
@@ -16,6 +16,7 @@
 import time
 import gc
 from collections import OrderedDict
+import numpy
 from mindspore import log as logger
 from .. import context
 from ..common import dtype as mstype
@@ -211,6 +212,9 @@ class Cell:
         if context.get_context("mode") == context.GRAPH_MODE:
             out = self.compile_and_run(*inputs)
             return out
+        for item in inputs:
+            if isinstance(item, numpy.ndarray):
+                raise TypeError("cell inputs should not be numpy array.")
         self.init_parameters_data()
         orign_grad = []
         if self.requires_grad is True:
@@ -827,6 +831,20 @@ class Cell:
         self._backward_hook = HookBackward(fn, self.cls_name + "(" + str(id(self)) + ")")
         self.enable_hook = True
 
+    def set_param_ps(self, recurse=True):
+        """
+        Set whether the trainable parameter is updated by parameter server.
+
+        Note:
+            This only works when running task in parameter server mode.
+
+        Args:
+            recurse (bool): Whether sets the trainable parameters of subcells. Default: True.
+        """
+        params = self.trainable_params(recurse)
+        for param in params:
+            param.set_param_ps()
+
 class GraphKernel(Cell):
     """
     Base class for GraphKernel.
diff --git a/mindspore/nn/distribution/__init__.py b/mindspore/nn/distribution/__init__.py
new file mode 100644
index 00000000000..55b4b03ef73
--- /dev/null
+++ b/mindspore/nn/distribution/__init__.py
@@ -0,0 +1,27 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+Distribution.
+
+The high-level components(Distributions) used to construct the probabilistic network.
+"""
+
+from .distribution import Distribution
+from .normal import Normal
+from .bernoulli import Bernoulli
+
+__all__ = ['Distribution',
+           'Normal',
+           'Bernoulli',]
diff --git a/mindspore/nn/distribution/_utils/__init__.py b/mindspore/nn/distribution/_utils/__init__.py
new file mode 100644
index 00000000000..816485643a2
--- /dev/null
+++ b/mindspore/nn/distribution/_utils/__init__.py
@@ -0,0 +1,24 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+Distribution operation utility functions.
+"""
+from .utils import *
+
+__all__ = ['check_scalar', 'convert_to_batch', 'cast_to_tensor',
+           'calc_batch_size', 'check_greater',
+           'check_greater_equal_zero',
+           'calc_broadcast_shape_from_param',
+           'check_scalar_from_param', 'check_prob']
diff --git a/mindspore/nn/distribution/_utils/utils.py b/mindspore/nn/distribution/_utils/utils.py
new file mode 100644
index 00000000000..c790a66f252
--- /dev/null
+++ b/mindspore/nn/distribution/_utils/utils.py
@@ -0,0 +1,199 @@
+
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Utitly functions to help distribution class."""
+import numpy as np
+from mindspore.ops import _utils as utils
+from ....common.tensor import Tensor
+from ....common.parameter import Parameter
+from ....common import dtype as mstype
+
+
+def check_scalar(value):
+    """
+    Check if input value is a scalar.
+    """
+    return np.isscalar(value)
+
+
+def cast_to_tensor(t, dtype=mstype.float32):
+    """
+    Cast an user input value into a Tensor of dtype.
+
+    Args:
+        t (int, float, list, numpy.ndarray, Tensor, Parameter): object to be cast to Tensor.
+        dtype (mindspore.dtype): dtype of the Tensor. Default: mstype.float32.
+
+    Raises:
+        RuntimeError: if t cannot be cast to Tensor.
+
+    Returns:
+        Tensor.
+    """
+    if isinstance(t, Parameter):
+        return t
+    if isinstance(t, Tensor):
+        #check if the Tensor in shape of Tensor(4)
+        if t.dim() == 0:
+            value = t.asnumpy()
+            return Tensor([t], dtype=dtype)
+        #convert the type of tensor to dtype
+        t.set_dtype(dtype)
+        return t
+    if isinstance(t, (list, np.ndarray)):
+        return Tensor(t, dtype=dtype)
+    if check_scalar(t):
+        return Tensor([t], dtype=dtype)
+    raise RuntimeError("Input type is not supported.")
+
+def calc_batch_size(batch_shape):
+    """
+    Calculate the size of a given batch_shape.
+
+    Args:
+        batch_shape (tuple): batch shape to be calculated.
+
+    Returns:
+        int.
+    """
+    return int(np.prod(batch_shape))
+
+def convert_to_batch(t, batch_shape, dtype):
+    """
+    Convert a Tensor to a given batch shape.
+
+    Args:
+        t (Tensor, Parameter): Tensor to be converted.
+        batch_shape (tuple): desired batch shape.
+        dtype (mindspore.dtype): desired dtype.
+
+    Raises:
+        RuntimeError: if the converison cannot be done.
+
+    Returns:
+        Tensor, with shape of batch_shape.
+    """
+    if isinstance(t, Parameter):
+        return t
+    t = cast_to_tensor(t, dtype)
+    if t.shape != batch_shape:
+        mul = calc_batch_size(batch_shape) // t.size()
+        if (calc_batch_size(batch_shape) % t.size()) != 0:
+            raise RuntimeError("Cannot cast the tensor to the given batch shape.")
+        temp = list(t.asnumpy()) * mul
+        temp = np.reshape(temp, batch_shape)
+        return Tensor(temp, dtype)
+    return t
+
+def check_scalar_from_param(params):
+    """
+    Check if params are all scalars.
+
+    Args:
+        params (dict): parameters used to initialize distribution.
+
+    Notes: String parameters are excluded.
+    """
+    for value in params.values():
+        if isinstance(value, (str, type(params['dtype']))):
+            continue
+        elif check_scalar(value):
+            continue
+        else:
+            return False
+    return True
+
+
+def calc_broadcast_shape_from_param(params):
+    """
+    Calculate the broadcast shape from params.
+
+    Args:
+        params (dict): parameters used to initialize distribution.
+
+    Returns:
+        tuple.
+    """
+    broadcast_shape = []
+    for value in params.values():
+        if isinstance(value, (str, type(params['dtype']))):
+            continue
+        if value is None:
+            return None
+        if isinstance(value, Parameter):
+            value_t = value.default_input
+        else:
+            value_t = cast_to_tensor(value, params['dtype'])
+        broadcast_shape = utils.get_broadcast_shape(broadcast_shape, list(value_t.shape), params['name'])
+    return tuple(broadcast_shape)
+
+def check_greater_equal_zero(value, name):
+    """
+    Check if the given Tensor is greater zero.
+
+    Args:
+        value (Tensor, Parameter): value to be checked.
+        name (str) : name of the value.
+
+    Raises:
+        ValueError: if the input value is less than zero.
+
+    """
+    if isinstance(value, Parameter):
+        if not isinstance(value.default_input, Tensor):
+            return
+        value = value.default_input
+    comp = np.less(value.asnumpy(), np.zeros(value.shape))
+    if comp.any():
+        raise ValueError(f'{name} should be greater than zero.')
+
+def check_greater(a, b, name_a, name_b):
+    """
+    Check if Tensor b is strictly greater than Tensor a.
+
+    Args:
+        a (Tensor): input tensor a.
+        b (Tensor): input tensor b.
+        name_a (str): name of Tensor_a.
+        name_b (str): name of Tensor_b.
+
+    Raises:
+        ValueError: if b is less than or equal to a
+    """
+    comp = np.less(a.asnumpy(), b.asnumpy())
+    if not comp.all():
+        raise ValueError(f'{name_a} should be less than {name_b}')
+
+
+def check_prob(p):
+    """
+    Check if p is a proper probability, i.e. 0 <= p <=1.
+
+    Args:
+        p (Tensor, Parameter): value to be checked.
+
+    Raises:
+        ValueError: if p is not a proper probability.
+    """
+    if isinstance(p, Parameter):
+        if not isinstance(p.default_input, Tensor):
+            return
+        p = p.default_input
+    comp = np.less(p.asnumpy(), np.zeros(p.shape))
+    if comp.any():
+        raise ValueError('Probabilities should be greater than or equal to zero')
+    comp = np.greater(p.asnumpy(), np.ones(p.shape))
+    if comp.any():
+        raise ValueError('Probabilities should be less than or equal to one')
diff --git a/mindspore/nn/distribution/bernoulli.py b/mindspore/nn/distribution/bernoulli.py
new file mode 100644
index 00000000000..9aa20d668fe
--- /dev/null
+++ b/mindspore/nn/distribution/bernoulli.py
@@ -0,0 +1,168 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Bernoulli Distribution"""
+from mindspore.ops import operations as P
+from mindspore.ops import composite as C
+from .distribution import Distribution
+from ._utils.utils import cast_to_tensor, check_prob
+from ...common import dtype as mstype
+
+class Bernoulli(Distribution):
+    """
+    Example class: Bernoulli Distribution.
+
+    Args:
+        probs (int, float, list, numpy.ndarray, Tensor, Parameter): probability of 1 as outcome.
+        seed (int): seed to use in sampling. Default: 0.
+        dtype (mindspore.dtype): type of the distribution. Default: mstype.int32.
+        name (str): name of the distribution. Default: Bernoulli.
+
+    Note:
+        probs should be proper probabilities (0 <= p <= 1).
+
+    Examples:
+        >>>    # To initialize a Bernoulli distribution which has equal probability of getting 1 and 0
+        >>>    b = nn.Bernoulli(0.5, dtype = mstype.int32)
+        >>>    # The following create two independent Bernoulli distributions
+        >>>    b = nn.Bernoulli([0.7, 0.2], dtype = mstype.int32)
+    """
+
+    def __init__(self,
+                 probs=None,
+                 seed=0,
+                 dtype=mstype.int32,
+                 name="Bernoulli"):
+        """
+        Constructor of Bernoulli distribution.
+        """
+        param = dict(locals())
+        super(Bernoulli, self).__init__(dtype, name, param)
+        if probs is not None:
+            self._probs = cast_to_tensor(probs)
+            check_prob(self._probs)
+        else:
+            self._probs = probs
+        self.seed = seed
+
+        # ops needed for the class
+        self.log = P.Log()
+        self.add = P.TensorAdd()
+        self.mul = P.Mul()
+        self.sqrt = P.Sqrt()
+        self.realdiv = P.RealDiv()
+        self.shape = P.Shape()
+        self.const = P.ScalarToArray()
+        self.less = P.Less()
+        self.cast = P.Cast()
+        self.erf = P.Erf()
+        self.sqrt = P.Sqrt()
+
+    def extend_repr(self):
+        str_info = f'probs = {self._probs}'
+        return str_info
+
+    def probs(self):
+        """
+        Returns the probability for the outcome is 1.
+        """
+        return self._probs
+
+    def _mean(self, name='mean', probs1=None):
+        r"""
+        .. math::
+            MEAN(B) = probs1
+        """
+        if name == 'mean':
+            return self._probs if probs1 is None else probs1
+        return None
+
+    def _var(self, name='var', probs1=None):
+        r"""
+        .. math::
+            VAR(B) = probs1 * probs0
+        """
+        if name in ('sd', 'var'):
+            probs1 = self._probs if probs1 is None else probs1
+            probs0 = self.add(1, -1 * probs1)
+            return self.mul(probs0, probs1)
+        return None
+
+    def _prob(self, name, value, probs=None):
+        r"""
+        pmf of Bernoulli distribution.
+
+        Args:
+            name (str): name of the function. Should be "prob" when passed in from construct.
+            value (Tensor): a Tensor composed of only zeros and ones.
+            probs (Tensor): probability of outcome is 1. Default: self._probs.
+
+        .. math::
+            pmf(k) = probs1 if k = 1;
+            pmf(k) = probs0 if k = 0;
+        """
+        if name in ('prob', 'log_prob'):
+            probs1 = self._probs if probs is None else probs
+            probs0 = self.add(1, -1 * probs1)
+            return self.add(self.mul(probs1, value),
+                            self.mul(probs0, self.add(1, -1 * value)))
+        return None
+
+    def _kl_loss(self, name, dist, probs1_b, probs1_a=None):
+        r"""
+        Evaluate bernoulli-bernoulli kl divergence, i.e. KL(a||b).
+
+        Args:
+            name (str): name of the funtion. Should always be "kl_loss" when passed in from construct.
+            dist (str): type of the distributions. Should be "Bernoulli" in this case.
+            probs1_b (Tensor): probs1 of distribution b.
+            probs1_a (Tensor): probs1 of distribution a. Default: self._probs.
+
+        .. math::
+            KL(a||b) = probs1_a * \log(\fract{probs1_a}{probs1_b}) +
+                       probs0_a * \log(\fract{probs0_a}{probs0_b})
+        """
+        if name == 'kl_loss' and dist == 'Bernoulli':
+            probs1_a = self._probs if probs1_a is None else probs1_a
+            probs0_a = self.add(1, -1 * probs1_a)
+            probs0_b = self.add(1, -1 * probs1_b)
+            return self.add(probs1_a * self.log(self.realdiv(probs1_a, probs1_b)),
+                            probs0_a * self.log(self.realdiv(probs0_a, probs0_b)))
+        return None
+
+    def _sample(self, name, shape=(), probs=None):
+        """
+        Sampling.
+
+        Args:
+            name (str): name of the function. Should always be 'sample' when passed in from construct.
+            shape (tuple): shape of the sample. Default: ().
+            probs (Tensor): probs1 of the samples. Default: self._probs.
+
+        Returns:
+            Tensor, shape is shape + batch_shape.
+        """
+        if name == 'sample':
+            probs1 = self._probs if probs is None else probs
+            batch_shape = self.shape(probs1)
+            sample_shape = shape + batch_shape
+            mean_zero = self.const(0.0)
+            sd_one = self.const(1.0)
+            sqrt_two = self.sqrt(self.const(2.0))
+            sample_norm = C.normal(sample_shape, mean_zero, sd_one, self.seed)
+            sample_uniform = 0.5 * (1 + self.erf(self.realdiv(sample_norm, sqrt_two)))
+            sample = self.less(sample_uniform, probs1)
+            sample = self.cast(sample, self._dtype)
+            return sample
+        return None
diff --git a/mindspore/nn/distribution/distribution.py b/mindspore/nn/distribution/distribution.py
new file mode 100644
index 00000000000..1ed7906a9e6
--- /dev/null
+++ b/mindspore/nn/distribution/distribution.py
@@ -0,0 +1,200 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""basic"""
+from ..cell import Cell
+from ._utils.utils import calc_broadcast_shape_from_param
+
+
+class Distribution(Cell):
+    """
+    Base class for all mathematical distributions.
+
+    Args:
+        dtype (mindspore.dtype): type of the distribution.
+        name (str): name of the distribution.
+        param (dict): parameters used to initialize the distribution.
+
+    Note:
+        Derived class should override operations such as ,_mean, _prob,
+        and _log_prob. Functions should be called through construct when
+        used inside a network in the form  of function name followed by
+        arguments.
+
+    Examples:
+        >>> class MyNormalDistribution(Distribution):
+        >>>    def __init__(self):
+        >>>        super(MyDistribution, self).__init__()
+        >>>        self._mean_value = Tensor([2.0,3.0])
+        >>>        self._sd_value = Tensor([2.0,3.0])
+        >>>
+        >>>    def _mean(self):
+        >>>        return self._mean_value
+
+    """
+    def __init__(self,
+                 dtype,
+                 name,
+                 param):
+
+        """
+        Constructor of distribution class.
+        """
+        super(Distribution, self).__init__()
+        self._name = name
+        self._dtype = dtype
+        self._parameters = {}
+        # parsing parameters
+        for k in param.keys():
+            if not(k == 'self' or k.startswith('_')):
+                self._parameters[k] = param[k]
+        # some attributes
+        self._broadcast_shape = calc_broadcast_shape_from_param(
+            self._parameters)
+
+        # set the function to call according to the derived class's attributes
+        self._set_prob()
+        self._set_log_prob()
+        self._set_sd()
+
+    def _set_prob(self):
+        """
+        Set probability funtion based on the availability of _prob and _log_likehood.
+        """
+        if hasattr(self, '_prob'):
+            self._call_prob = self._prob
+        elif hasattr(self, '_log_likelihood'):
+            self._call_prob = self._calc_prob_from_log_likelihood
+
+    def _set_sd(self):
+        """
+        Set standard deviation based on the availability of _sd and _var.
+        """
+        if hasattr(self, '_sd'):
+            self._call_sd = self._sd
+        elif hasattr(self, '_var'):
+            self._call_sd = self._calc_sd_from_var
+
+    def _set_log_prob(self):
+        """
+        Set log probability based on the availability of _prob and _log_likelihood.
+        """
+        if hasattr(self, '_log_likelihood'):
+            self._call_log_prob = self._log_likelihood
+        if hasattr(self, '_prob'):
+            self._call_log_prob = self._calc_log_prob_from_prob
+
+    def log_likelihood(self, *args):
+        """
+        Evaluate the log probability at the given value.
+
+        Note:
+            value is casted to Tensor for further calculation.
+
+        Returns:
+            Tensor, shape is the broadcast_shape of the distribution.
+        """
+        return self._call_log_prob(*args)
+
+    def _calc_prob_from_log_likelihood(self, *args):
+        r"""
+        Evaluate prob from log probability.
+
+        .. math::
+            probability(x) = \exp(log_likehood(x))
+        """
+        return self.exp(self._log_likelihood(*args))
+
+    def prob(self, *args):
+        """
+        Evaluate the prob (pdf or pmf) at given value.
+
+        Note:
+            value is casted to Tensor for further calculation.
+
+        Returns:
+            Tensor, shape is the broadcast_shape of the distribution.
+        """
+        return self._call_prob(*args)
+
+    def _calc_log_prob_from_prob(self, *args):
+        r"""
+        Evaluate log probability from probability.
+
+        .. math::
+            log_prob(x) = \log(prob(x))
+        """
+        return self.log(self._prob(*args))
+
+    def kl_loss(self, **kwargs):
+        """
+        Evaluate the KL divergence. Parameters of the second distribution should be
+        passed in through **kwargs.
+
+        Returns:
+            Tensor, shape is the broadcast_shape of the distribution and input distribution.
+        """
+        return self._kl_loss(**kwargs)
+
+    def mean(self, **kwargs):
+        """
+        Evaluate the mean.
+
+        Returns:
+            Tensor, shape is the broadcast_shape of the distribution.
+        """
+        return self._mean(**kwargs)
+
+    def sd(self, **kwargs):
+        """
+        Evaluate the standard deviation.
+
+        Returns:
+            Tensor, shape is the broadcast_shape of the distribution.
+        """
+        return self._call_sd(**kwargs)
+
+    def _calc_sd_from_var(self, *args):
+        r"""
+        Evaluate log probability from probability.
+
+        .. math::
+            STD(x) = \sqrt(VAR(x))
+        """
+        return self.sqrt(self._var(*args))
+
+    def construct(self, *inputs):
+        """
+        Override construct in Cell.
+
+        Args:
+            *inputs: inputs[0] is always the name of the function.
+
+        Notes:
+            Always raise RuntimeError as Distribution should not be called directly.
+        """
+
+        if inputs[0] == 'log_prob':
+            return self._call_log_prob(*inputs)
+        if inputs[0] == 'prob':
+            return self._call_prob(*inputs)
+        if inputs[0] == 'kl_loss':
+            return self._kl_loss(*inputs)
+        if inputs[0] == 'mean':
+            return self._mean(*inputs)
+        if inputs[0] == 'sd':
+            return self._call_sd(*inputs)
+        if inputs[0] == 'sample':
+            return self._sample(*inputs)
+        return None
diff --git a/mindspore/nn/distribution/normal.py b/mindspore/nn/distribution/normal.py
new file mode 100644
index 00000000000..61cec6d8106
--- /dev/null
+++ b/mindspore/nn/distribution/normal.py
@@ -0,0 +1,170 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Normal Distribution"""
+import numpy as np
+from mindspore.ops import operations as P
+from mindspore.ops import composite as C
+from .distribution import Distribution
+from ._utils.utils import convert_to_batch, check_greater_equal_zero
+from ...common import dtype as mstype
+from ...context import get_context
+
+class Normal(Distribution):
+    """
+    Example class: Normal distribution.
+
+    Args:
+        mean (int, float, list, numpy.ndarray, Tensor, Parameter): mean of the Gaussian distribution.
+        sd (int, float, list, numpy.ndarray, Tensor, Parameter): stddev of the Gaussian distribution.
+        seed (int): seed to use in sampling. Default: 0.
+        dtype (mindspore.dtype): type of the distribution. Default: mstype.float32.
+        name (str): name of the distribution. Default: Normal.
+
+
+    Note:
+        Standard deviation should be greater than zero.
+
+    Examples:
+        >>>    # To initialize a normal distribution of mean 3.0 and standard deviation 4.0
+        >>>    n = nn.Normal(3.0, 4.0, dtype=mstype.float32)
+        >>>    # The following create two independent normal distributions
+        >>>    n = nn.Normal([3.0, 3.0], [4.0, 4.0], dtype=mstype.float32)
+    """
+
+    def __init__(self,
+                 mean=None,
+                 sd=None,
+                 seed=0,
+                 dtype=mstype.float32,
+                 name="Normal"):
+        """
+        Constructor of normal distribution.
+        """
+        param = dict(locals())
+        super(Normal, self).__init__(dtype, name, param)
+        if  mean is not None and sd is not None:
+            self._mean_value = convert_to_batch(mean, self._broadcast_shape, dtype)
+            self._sd_value = convert_to_batch(sd, self._broadcast_shape, dtype)
+            check_greater_equal_zero(self._sd_value, "Standard deviation")
+        else:
+            self._mean_value = mean
+            self._sd_value = sd
+        self.seed = seed
+
+        #ops needed for the class
+        self.exp = P.Exp()
+        self.add = P.TensorAdd()
+        self.mul = P.Mul()
+        self.sq = P.Square()
+        self.log = P.Log()
+        self.sqrt = P.Sqrt()
+        self.realdiv = P.RealDiv()
+        self.expm1 = P.Expm1() if get_context('device_target') == 'Ascend' else self._expm1_by_step
+        self.shape = P.Shape()
+        self.zeroslike = P.ZerosLike()
+        self.const = P.ScalarToArray()
+
+    def extend_repr(self):
+        str_info = f'mean = {self._mean_value}, standard deviation = {self._sd_value}'
+        return str_info
+
+    def _expm1_by_step(self, x):
+        """
+        Expm1 ops under GPU context.
+        """
+        return self.add(self.exp(x), -1)
+
+    def _mean(self, name='mean', mean=None, sd=None):
+        """
+        Mean of the distribution.
+        """
+        if name == 'mean':
+            mean = self._mean_value if mean is None or sd is None else mean
+            return mean
+        return None
+
+    def _sd(self, name='sd', mean=None, sd=None):
+        """
+        Standard deviation of the distribution.
+        """
+        if name in ('sd', 'var'):
+            sd = self._sd_value if mean is None or sd is None else sd
+            return sd
+        return None
+
+    def _log_likelihood(self, name, value, mean=None, sd=None):
+        r"""
+        Evaluate log probability.
+
+        .. math::
+            L(x) = -1* \fract{(x - \mu)^2}{2. * \sigma^2} - \log(\sqrt(2* \pi * \sigma^2))
+        """
+        if name in ('prob', 'log_prob'):
+            mean = self._mean_value if mean is None else mean
+            sd = self._sd_value if sd is None else sd
+            unnormalized_log_prob = -1. * self.realdiv(self.sq(self.add(value, -1. * mean)),
+                                                       2. * self.sq(sd))
+            neg_normalization = -1. * self.log(self.sqrt(2. * np.pi * self.sq(sd)))
+            return self.add(unnormalized_log_prob, neg_normalization)
+        return None
+
+    def _kl_loss(self, name, dist, mean_b, sd_b, mean_a=None, sd_a=None):
+        r"""
+        Evaluate Normal-Normal kl divergence, i.e. KL(a||b).
+
+        Args:
+            name (str): name of the funtion passed in from construct. Should always be "kl_loss".
+            dist (str): type of the distributions. Should be "Normal" in this case.
+            mean_b (Tensor): mean of distribution b.
+            sd_b (Tensor): standard deviation distribution b.
+            mean_a (Tensor): mean of distribution a. Default: self._mean_value.
+            sd_a (Tensor): standard deviation distribution a. Default: self._sd_value.
+
+        .. math::
+            KL(a||b) = 0.5 * (\fract{MEAN(a)}{STD(b)} - \fract{MEAN(b)}{STD(b)}) ^ 2 +
+                       0.5 * EXPM1(2 * (\log(STD(a)) - \log(STD(b))) - (\log(STD(a)) - \log(STD(b)))
+        """
+        if name == 'kl_loss' and dist == 'Normal':
+            mean_a = self._mean_value if mean_a is None else mean_a
+            sd_a = self._sd_value if sd_a is None else sd_a
+            diff_log_scale = self.add(self.log(sd_a), - self.log(sd_b))
+            squared_diff = self.sq(self.add(self.realdiv(mean_a, sd_b), - self.realdiv(mean_b, sd_b)))
+            return self.add(self.add(0.5 * squared_diff, 0.5 * self.expm1(2 * diff_log_scale)), - diff_log_scale)
+        return None
+
+    def _sample(self, name, shape=(), mean=None, sd=None):
+        """
+        Sampling.
+
+        Args:
+            name (str): name of the function. Should always be 'sample' when passed in from construct.
+            shape (tuple): shape of the sample. Default: ().
+            mean (Tensor): mean of the samples. Default: self._mean_value.
+            sd (Tensor): standard deviation of the samples. Default: self._sd_value.
+
+        Returns:
+            Tensor, shape is shape + batch_shape.
+        """
+        if name == 'sample':
+            mean = self._mean_value if mean is None else mean
+            sd = self._sd_value if sd is None else sd
+            batch_shape = self.shape(self.add(self.zeroslike(mean), self.zeroslike(sd)))
+            sample_shape = shape + batch_shape
+            mean_zero = self.const(0.0)
+            sd_one = self.const(1.0)
+            sample_norm = C.normal(sample_shape, mean_zero, sd_one, self.seed)
+            sample = self.add(mean, self.mul(sample_norm, sd))
+            return sample
+        return None
diff --git a/mindspore/nn/layer/activation.py b/mindspore/nn/layer/activation.py
index 14a1aa85540..384f6251338 100644
--- a/mindspore/nn/layer/activation.py
+++ b/mindspore/nn/layer/activation.py
@@ -530,6 +530,7 @@ _activation = {
     'relu6': ReLU6,
     'tanh': Tanh,
     'gelu': GELU,
+    'elu': ELU,
     'sigmoid': Sigmoid,
     'prelu': PReLU,
     'leakyrelu': LeakyReLU,
diff --git a/mindspore/nn/layer/container.py b/mindspore/nn/layer/container.py
index 48871401bf7..ed36a1dd5ff 100644
--- a/mindspore/nn/layer/container.py
+++ b/mindspore/nn/layer/container.py
@@ -69,7 +69,7 @@ class SequentialCell(Cell):
     Alternatively, an ordered dict of cells can also be passed in.
 
     Args:
-        args (list, optional): List of subclass of Cell.
+        args (list, OrderedDict): List of subclass of Cell.
 
     Raises:
         TypeError: If arg is not of type list or OrderedDict.
diff --git a/mindspore/nn/layer/embedding.py b/mindspore/nn/layer/embedding.py
index c8873039ab7..3c4245d7020 100755
--- a/mindspore/nn/layer/embedding.py
+++ b/mindspore/nn/layer/embedding.py
@@ -21,7 +21,7 @@ from mindspore.common.initializer import initializer
 from ..cell import Cell
 from ..._checkparam import Validator as validator
 
-__all__ = ['Embedding']
+__all__ = ['Embedding', 'EmbeddingLookup']
 
 class Embedding(Cell):
     r"""
@@ -105,3 +105,49 @@ class Embedding(Cell):
                 self.embedding_table,
                 self.dtype)
         return s
+
+class EmbeddingLookup(Cell):
+    r"""
+    Returns a slice of input tensor based on the specified indices.
+
+    Note:
+        When 'target' is set to 'CPU', this module will use
+        P.EmbeddingLookup().add_prim_attr('primitive_target', 'CPU') which
+        specified 'offset = 0' to lookup table.
+        when 'target' is set to 'DEVICE', this module will use P.GatherV2() which
+        specified 'axis = 0' to lookup table.
+
+    Args:
+        target (str): Specify the target where the op is executed. Default: 'CPU'.
+
+    Inputs:
+        - **input_params** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
+          The Tensor slice, instead of the entire Tensor.
+        - **input_indices** (Tensor) - The shape of tensor is :math:`(y_1, y_2, ..., y_S)`.
+          Specifies the indices of elements of the original Tensor. Values can be out of range of `input_params`,
+          and the exceeding part will be filled with 0 in the output.
+
+    Outputs:
+        Tensor, the shape of tensor is :math:`(z_1, z_2, ..., z_N)`.
+
+    Examples:
+        >>> input_params = Tensor(np.array([[8, 9], [10, 11], [12, 13], [14, 15]]), mindspore.float32)
+        >>> input_indices = Tensor(np.array([[1, 0], [3, 2]]), mindspore.int32)
+        >>> out = nn.EmbeddingLookup()(input_params, input_indices)
+        [[[10, 11], [8 ,9]], [[14, 15], [12, 13]]]
+    """
+    def __init__(self, target='CPU'):
+        super(EmbeddingLookup, self).__init__()
+        self.target = target
+        if target not in ('CPU', 'DEVICE'):
+            raise ValueError('Attr \'target\' of \'EmbeddingLookup\' Op passed '
+                             + str(target) + ', should be one of values in \'CPU\', \'DEVICE\'.')
+        self.gatherv2 = P.GatherV2()
+        self.embeddinglookup = P.EmbeddingLookup().add_prim_attr('primitive_target', 'CPU')
+
+    def construct(self, params, indices):
+        if self.target == "CPU":
+            out = self.embeddinglookup(params, indices, 0)
+        else:
+            out = self.gatherv2(params, indices, 0)
+        return out
diff --git a/mindspore/nn/layer/image.py b/mindspore/nn/layer/image.py
index 3721bc3c44e..63ae7a94ace 100644
--- a/mindspore/nn/layer/image.py
+++ b/mindspore/nn/layer/image.py
@@ -21,9 +21,13 @@ from mindspore.ops import functional as F
 from mindspore.ops.primitive import constexpr
 from mindspore._checkparam import Validator as validator
 from mindspore._checkparam import Rel
+from .conv import Conv2d
+from .container import CellList
+from .pooling import AvgPool2d
+from .activation import ReLU
 from ..cell import Cell
 
-__all__ = ['ImageGradients', 'SSIM', 'PSNR', 'CentralCrop']
+__all__ = ['ImageGradients', 'SSIM', 'MSSSIM', 'PSNR', 'CentralCrop']
 
 class ImageGradients(Cell):
     r"""
@@ -83,21 +87,6 @@ def _convert_img_dtype_to_float32(img, max_val):
         ret = ret * scale
     return ret
 
-
-@constexpr
-def _gauss_kernel_helper(filter_size):
-    """gauss kernel helper"""
-    filter_size = F.scalar_cast(filter_size, mstype.int32)
-    coords = ()
-    for i in range(filter_size):
-        i_cast = F.scalar_cast(i, mstype.float32)
-        offset = F.scalar_cast(filter_size-1, mstype.float32)/2.0
-        element = i_cast-offset
-        coords = coords+(element,)
-    g = np.square(coords).astype(np.float32)
-    g = Tensor(g)
-    return filter_size, g
-
 @constexpr
 def _check_input_4d(input_shape, param_name, func_name):
     if len(input_shape) != 4:
@@ -110,9 +99,65 @@ def _check_input_filter_size(input_shape, param_name, filter_size, func_name):
     validator.check(param_name + " shape[2]", input_shape[2], "filter_size", filter_size, Rel.GE, func_name)
     validator.check(param_name + " shape[3]", input_shape[3], "filter_size", filter_size, Rel.GE, func_name)
 
-@constexpr
-def _check_input_dtype(input_dtype, param_name, allow_dtypes, cls_name):
-    validator.check_type_name(param_name, input_dtype, allow_dtypes, cls_name)
+def _conv2d(in_channels, out_channels, kernel_size, weight, stride=1, padding=0):
+    return Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride,
+                  weight_init=weight, padding=padding, pad_mode="valid")
+
+def _create_window(size, sigma):
+    x_data, y_data = np.mgrid[-size // 2 + 1:size // 2 + 1, -size // 2 + 1:size // 2 + 1]
+    x_data = np.expand_dims(x_data, axis=-1).astype(np.float32)
+    x_data = np.expand_dims(x_data, axis=-1) ** 2
+    y_data = np.expand_dims(y_data, axis=-1).astype(np.float32)
+    y_data = np.expand_dims(y_data, axis=-1) ** 2
+    sigma = 2 * sigma ** 2
+    g = np.exp(-(x_data + y_data) / sigma)
+    return np.transpose(g / np.sum(g), (2, 3, 0, 1))
+
+def _split_img(x):
+    _, c, _, _ = F.shape(x)
+    img_split = P.Split(1, c)
+    output = img_split(x)
+    return output, c
+
+def _compute_per_channel_loss(c1, c2, img1, img2, conv):
+    """computes ssim index between img1 and img2 per single channel"""
+    dot_img = img1 * img2
+    mu1 = conv(img1)
+    mu2 = conv(img2)
+    mu1_sq = mu1 * mu1
+    mu2_sq = mu2 * mu2
+    mu1_mu2 = mu1 * mu2
+    sigma1_tmp = conv(img1 * img1)
+    sigma1_sq = sigma1_tmp - mu1_sq
+    sigma2_tmp = conv(img2 * img2)
+    sigma2_sq = sigma2_tmp - mu2_sq
+    sigma12_tmp = conv(dot_img)
+    sigma12 = sigma12_tmp - mu1_mu2
+    a = (2 * mu1_mu2 + c1)
+    b = (mu1_sq + mu2_sq + c1)
+    v1 = 2 * sigma12 + c2
+    v2 = sigma1_sq + sigma2_sq + c2
+    ssim = (a * v1) / (b * v2)
+    cs = v1 / v2
+    return ssim, cs
+
+def _compute_multi_channel_loss(c1, c2, img1, img2, conv, concat, mean):
+    """computes ssim index between img1 and img2 per color channel"""
+    split_img1, c = _split_img(img1)
+    split_img2, _ = _split_img(img2)
+    multi_ssim = ()
+    multi_cs = ()
+    for i in range(c):
+        ssim_per_channel, cs_per_channel = _compute_per_channel_loss(c1, c2, split_img1[i], split_img2[i], conv)
+        multi_ssim += (ssim_per_channel,)
+        multi_cs += (cs_per_channel,)
+
+    multi_ssim = concat(multi_ssim)
+    multi_cs = concat(multi_cs)
+
+    ssim = mean(multi_ssim, (2, 3))
+    cs = mean(multi_cs, (2, 3))
+    return ssim, cs
 
 class SSIM(Cell):
     r"""
@@ -157,67 +202,126 @@ class SSIM(Cell):
         self.max_val = max_val
         self.filter_size = validator.check_integer('filter_size', filter_size, 1, Rel.GE, self.cls_name)
         self.filter_sigma = validator.check_float_positive('filter_sigma', filter_sigma, self.cls_name)
-        validator.check_value_type('k1', k1, [float], self.cls_name)
-        self.k1 = validator.check_number_range('k1', k1, 0.0, 1.0, Rel.INC_NEITHER, self.cls_name)
-        validator.check_value_type('k2', k2, [float], self.cls_name)
-        self.k2 = validator.check_number_range('k2', k2, 0.0, 1.0, Rel.INC_NEITHER, self.cls_name)
-        self.mean = P.DepthwiseConv2dNative(channel_multiplier=1, kernel_size=filter_size)
+        self.k1 = validator.check_value_type('k1', k1, [float], self.cls_name)
+        self.k2 = validator.check_value_type('k2', k2, [float], self.cls_name)
+        window = _create_window(filter_size, filter_sigma)
+        self.conv = _conv2d(1, 1, filter_size, Tensor(window))
+        self.conv.weight.requires_grad = False
+        self.reduce_mean = P.ReduceMean()
+        self.concat = P.Concat(axis=1)
 
     def construct(self, img1, img2):
-        _check_input_dtype(F.dtype(img1), "img1", [mstype.float32, mstype.float16], self.cls_name)
         _check_input_filter_size(F.shape(img1), "img1", self.filter_size, self.cls_name)
         P.SameTypeShape()(img1, img2)
         max_val = _convert_img_dtype_to_float32(self.max_val, self.max_val)
         img1 = _convert_img_dtype_to_float32(img1, self.max_val)
         img2 = _convert_img_dtype_to_float32(img2, self.max_val)
 
-        kernel = self._fspecial_gauss(self.filter_size, self.filter_sigma)
-        kernel = P.Tile()(kernel, (1, P.Shape()(img1)[1], 1, 1))
+        c1 = (self.k1 * max_val) ** 2
+        c2 = (self.k2 * max_val) ** 2
 
-        mean_ssim = self._calculate_mean_ssim(img1, img2, kernel, max_val, self.k1, self.k2)
+        ssim_ave_channel, _ = _compute_multi_channel_loss(c1, c2, img1, img2, self.conv, self.concat, self.reduce_mean)
+        loss = self.reduce_mean(ssim_ave_channel, -1)
 
-        return mean_ssim
+        return loss
 
-    def _calculate_mean_ssim(self, x, y, kernel, max_val, k1, k2):
-        """calculate mean ssim"""
-        c1 = (k1 * max_val) * (k1 * max_val)
-        c2 = (k2 * max_val) * (k2 * max_val)
+def _downsample(img1, img2, op):
+    a = op(img1)
+    b = op(img2)
+    return a, b
 
-        # SSIM luminance formula
-        # (2 * mean_{x} * mean_{y} + c1) / (mean_{x}**2 + mean_{y}**2 + c1)
-        mean_x = self.mean(x, kernel)
-        mean_y = self.mean(y, kernel)
-        square_sum = F.square(mean_x)+F.square(mean_y)
-        luminance = (2*mean_x*mean_y+c1)/(square_sum+c1)
+class MSSSIM(Cell):
+    r"""
+    Returns MS-SSIM index between img1 and img2.
 
-        # SSIM contrast*structure formula (when c3 = c2/2)
-        # (2 * conv_{xy} + c2) / (conv_{xx} + conv_{yy} + c2), equals to
-        # (2 * (mean_{xy} - mean_{x}*mean_{y}) + c2) / (mean_{xx}-mean_{x}**2 + mean_{yy}-mean_{y}**2 + c2)
-        mean_xy = self.mean(x*y, kernel)
-        mean_square_add = self.mean(F.square(x)+F.square(y), kernel)
+    Its implementation is based on Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. `Multiscale structural similarity
+    for image quality assessment <https://ieeexplore.ieee.org/document/1292216>`_.
+    Signals, Systems and Computers, 2004.
 
-        cs = (2*(mean_xy-mean_x*mean_y)+c2)/(mean_square_add-square_sum+c2)
+    .. math::
 
-        # SSIM formula
-        # luminance * cs
-        ssim = luminance*cs
+        l(x,y)&=\frac{2\mu_x\mu_y+C_1}{\mu_x^2+\mu_y^2+C_1}, C_1=(K_1L)^2.\\
+        c(x,y)&=\frac{2\sigma_x\sigma_y+C_2}{\sigma_x^2+\sigma_y^2+C_2}, C_2=(K_2L)^2.\\
+        s(x,y)&=\frac{\sigma_{xy}+C_3}{\sigma_x\sigma_y+C_3}, C_3=C_2/2.\\
+        MSSSIM(x,y)&=l^alpha_M*{\prod_{1\leq j\leq M} (c^beta_j*s^gamma_j)}.
 
-        mean_ssim = P.ReduceMean()(ssim, (-3, -2, -1))
+    Args:
+        max_val (Union[int, float]): The dynamic range of the pixel values (255 for 8-bit grayscale images).
+          Default: 1.0.
+        power_factors (Union[tuple, list]): Iterable of weights for each of the scales.
+          Default: (0.0448, 0.2856, 0.3001, 0.2363, 0.1333). Default values obtained by Wang et al.
+        filter_size (int): The size of the Gaussian filter. Default: 11.
+        filter_sigma (float): The standard deviation of Gaussian kernel. Default: 1.5.
+        k1 (float): The constant used to generate c1 in the luminance comparison function. Default: 0.01.
+        k2 (float): The constant used to generate c2 in the contrast comparison function. Default: 0.03.
 
-        return mean_ssim
+    Inputs:
+        - **img1** (Tensor) - The first image batch with format 'NCHW'. It should be the same shape and dtype as img2.
+        - **img2** (Tensor) - The second image batch with format 'NCHW'. It should be the same shape and dtype as img1.
 
-    def _fspecial_gauss(self, filter_size, filter_sigma):
-        """get gauss kernel"""
-        filter_size, g = _gauss_kernel_helper(filter_size)
+    Outputs:
+        Tensor, has the same dtype as img1. It is a 1-D tensor with shape N, where N is the batch num of img1.
 
-        square_sigma_scale = -0.5/(filter_sigma * filter_sigma)
-        g = g*square_sigma_scale
-        g = F.reshape(g, (1, -1))+F.reshape(g, (-1, 1))
-        g = F.reshape(g, (1, -1))
-        g = P.Softmax()(g)
-        ret = F.reshape(g, (1, 1, filter_size, filter_size))
-        return ret
+    Examples:
+        >>> net = nn.MSSSIM(power_factors=(0.033, 0.033, 0.033))
+        >>> img1 = Tensor(np.random.random((1,3,128,128)))
+        >>> img2 = Tensor(np.random.random((1,3,128,128)))
+        >>> msssim = net(img1, img2)
+    """
+    def __init__(self, max_val=1.0, power_factors=(0.0448, 0.2856, 0.3001, 0.2363, 0.1333), filter_size=11,
+                 filter_sigma=1.5, k1=0.01, k2=0.03):
+        super(MSSSIM, self).__init__()
+        validator.check_value_type('max_val', max_val, [int, float], self.cls_name)
+        validator.check_number('max_val', max_val, 0.0, Rel.GT, self.cls_name)
+        self.max_val = max_val
+        validator.check_value_type('power_factors', power_factors, [tuple, list], self.cls_name)
+        self.filter_size = validator.check_integer('filter_size', filter_size, 1, Rel.GE, self.cls_name)
+        self.filter_sigma = validator.check_float_positive('filter_sigma', filter_sigma, self.cls_name)
+        self.k1 = validator.check_value_type('k1', k1, [float], self.cls_name)
+        self.k2 = validator.check_value_type('k2', k2, [float], self.cls_name)
+        window = _create_window(filter_size, filter_sigma)
+        self.level = len(power_factors)
+        self.conv = []
+        for i in range(self.level):
+            self.conv.append(_conv2d(1, 1, filter_size, Tensor(window)))
+            self.conv[i].weight.requires_grad = False
+        self.multi_convs_list = CellList(self.conv)
+        self.weight_tensor = Tensor(power_factors, mstype.float32)
+        self.avg_pool = AvgPool2d(kernel_size=2, stride=2, pad_mode='valid')
+        self.relu = ReLU()
+        self.reduce_mean = P.ReduceMean()
+        self.prod = P.ReduceProd()
+        self.pow = P.Pow()
+        self.pack = P.Pack(axis=-1)
+        self.concat = P.Concat(axis=1)
 
+    def construct(self, img1, img2):
+        _check_input_4d(F.shape(img1), "img1", self.cls_name)
+        _check_input_4d(F.shape(img2), "img2", self.cls_name)
+        P.SameTypeShape()(img1, img2)
+        max_val = _convert_img_dtype_to_float32(self.max_val, self.max_val)
+        img1 = _convert_img_dtype_to_float32(img1, self.max_val)
+        img2 = _convert_img_dtype_to_float32(img2, self.max_val)
+
+        c1 = (self.k1 * max_val) ** 2
+        c2 = (self.k2 * max_val) ** 2
+
+        sim = ()
+        mcs = ()
+
+        for i in range(self.level):
+            sim, cs = _compute_multi_channel_loss(c1, c2, img1, img2,
+                                                  self.multi_convs_list[i], self.concat, self.reduce_mean)
+            mcs += (self.relu(cs),)
+            img1, img2 = _downsample(img1, img2, self.avg_pool)
+
+        mcs = mcs[0:-1:1]
+        mcs_and_ssim = self.pack(mcs + (self.relu(sim),))
+        mcs_and_ssim = self.pow(mcs_and_ssim, self.weight_tensor)
+        ms_ssim = self.prod(mcs_and_ssim, -1)
+        loss = self.reduce_mean(ms_ssim, -1)
+
+        return loss
 
 class PSNR(Cell):
     r"""
diff --git a/mindspore/nn/layer/math.py b/mindspore/nn/layer/math.py
index 1ecb20056e2..ddcaf2da6b9 100644
--- a/mindspore/nn/layer/math.py
+++ b/mindspore/nn/layer/math.py
@@ -55,7 +55,7 @@ class ReduceLogSumExp(Cell):
 
     Examples:
         >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
-        >>> op = P.ReduceLogSumExp(keep_dims=True)
+        >>> op = nn.ReduceLogSumExp(keep_dims=True)
         >>> output = op(input_x, 1)
     """
 
@@ -132,23 +132,19 @@ class Range(Cell):
 
 class LinSpace(Cell):
     r"""
-    Generates values in an interval. And return the corresponding interpolation accroding to assist.
+    Generates values in an interval.
 
     Args:
-        - **start** (Union[int, float]) - The start of interval, With shape of 0-D.
-        - **stop** (Union[int, float]) - The end of interval, With shape of 0-D.
-        - **num** (int) - ticks number in the interval, the ticks include start and stop value.
-          With shape of 0-D.
+        start (Union[int, float]): The start of interval. With shape of 0-D.
+        stop (Union[int, float]): The end of interval. With shape of 0-D.
+        num (int): ticks number in the interval, the ticks include start and stop value. With shape of 0-D.
 
     Outputs:
         Tensor, With type same as `start`. The shape is 1-D with length of `num`.
 
     Examples:
-        >>> linspace = nn.LinSpace()
-        >>> start = Tensor(1, mindspore.float32)
-        >>> stop = Tensor(10, mindspore.float32)
-        >>> num = Tensor(5, mindspore.int32)
-        >>> output = linspace(start, stop, num)
+        >>> linspace = nn.LinSpace(1, 10, 5)
+        >>> output = linspace()
         [1, 3.25, 5.5, 7.75, 10]
     """
 
diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index d6c920b6206..05e5e54b96b 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -84,13 +84,14 @@ class _BatchNorm(Cell):
         self.dtype = P.DType()
         self.reshape = P.Reshape()
         self.is_ascend = context.get_context("device_target") == "Ascend"
+        self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE
         self.momentum = 1.0 - momentum
         if context.get_context("enable_ge"):
             self.is_ge_backend = True
         else:
             self.is_ge_backend = False
 
-        if self.is_ge_backend or self.is_ascend:
+        if self.is_graph_mode and (self.is_ge_backend or self.is_ascend):
             self.bn_train = P.BatchNorm(is_training=True,
                                         epsilon=self.eps)
         else:
@@ -152,7 +153,7 @@ class _BatchNorm(Cell):
             if self.is_ge_backend and self.is_global:
                 axes, re_shape = _shape_infer(F.shape(x), self.num_features)
                 y = self._global_sync(x, axes, re_shape)
-            elif self.is_ge_backend or self.is_ascend:
+            elif self.is_graph_mode and (self.is_ge_backend or self.is_ascend):
                 if self.is_global:
                     axes, re_shape = _shape_infer(F.shape(x), self.num_features)
                     y = self._global_sync(x, axes, re_shape)
@@ -587,7 +588,7 @@ class GroupNorm(Cell):
         """calculate groupnorm output"""
         batch, channel, height, width = self.shape(x)
         _channel_check(channel, self.num_channels)
-        x = self.reshape(x, (batch, self.num_groups, channel*height*width/self.num_groups))
+        x = self.reshape(x, (batch, self.num_groups, -1))
         mean = self.reduce_mean(x, 2)
         var = self.reduce_sum(self.square(x - mean), 2) / (channel * height * width / self.num_groups - 1)
         std = self.sqrt(var + self.eps)
diff --git a/mindspore/nn/layer/quant.py b/mindspore/nn/layer/quant.py
index f0c82937c57..63cdedbfe94 100644
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@@ -17,6 +17,7 @@
 from functools import partial
 import numpy as np
 
+from mindspore import nn
 import mindspore.common.dtype as mstype
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
@@ -41,8 +42,7 @@ __all__ = [
     'Conv2dBatchNormQuant',
     'Conv2dQuant',
     'DenseQuant',
-    'ReLUQuant',
-    'ReLU6Quant',
+    'ActQuant',
     'HSwishQuant',
     'HSigmoidQuant',
     'TensorAddQuant',
@@ -375,9 +375,10 @@ class FakeQuantWithMinMax(Cell):
 
     def extend_repr(self):
         s = 'num_bits={}, symmetric={}, narrow_range={}, ema={}({}), per_channel={}({}, {}), ' \
-            'quant_delay={}, min_init={}, max_init={}'.format(
-                self.num_bits, self.symmetric, self.narrow_range, self.ema, self.ema_decay, self.per_channel,
-                self.channel_axis, self.num_channels, self.quant_delay, self.min_init, self.max_init)
+            'quant_delay={}, min_init={}, max_init={}'.format(self.num_bits, self.symmetric, self.narrow_range,
+                                                              self.ema, self.ema_decay, self.per_channel,
+                                                              self.channel_axis, self.num_channels, self.quant_delay,
+                                                              self.min_init, self.max_init)
         return s
 
     def construct(self, x):
@@ -540,10 +541,12 @@ class Conv2dBatchNormQuant(Cell):
     def extend_repr(self):
         s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
             'pad_mode={}, padding={}, dilation={}, group={}, ' \
-            'fake={}, freeze_bn={}, momentum={}, quant_delay={}'.format(
-                self.in_channels, self.out_channels, self.kernel_size, self.stride,
-                self.pad_mode, self.padding, self.dilation, self.group,
-                self.fake, self.freeze_bn, self.momentum, self.quant_delay)
+            'fake={}, freeze_bn={}, momentum={}, quant_delay={}'.format(self.in_channels, self.out_channels,
+                                                                        self.kernel_size, self.stride,
+                                                                        self.pad_mode, self.padding, self.dilation,
+                                                                        self.group,
+                                                                        self.fake, self.freeze_bn, self.momentum,
+                                                                        self.quant_delay)
         return s
 
     def construct(self, x):
@@ -685,10 +688,9 @@ class Conv2dQuant(Cell):
     def extend_repr(self):
         s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
             'pad_mode={}, padding={}, dilation={}, group={}, ' \
-            'has_bias={}, quant_delay={}'.format(
-                self.in_channels, self.out_channels, self.kernel_size, self.stride,
-                self.pad_mode, self.padding, self.dilation, self.group,
-                self.has_bias, self.quant_delay)
+            'has_bias={}, quant_delay={}'.format(self.in_channels, self.out_channels, self.kernel_size, self.stride,
+                                                 self.pad_mode, self.padding, self.dilation, self.group,
+                                                 self.has_bias, self.quant_delay)
         return s
 
 
@@ -799,76 +801,23 @@ class DenseQuant(Cell):
 
 class _QuantActivation(Cell):
     r"""
-    Base class for Quant activation function. Add Fake Quant OP after activation OP.
+    Base class for quantization aware training activation function. Add Fake Quant OP after activation OP.
     """
 
     def get_origin(self):
         raise NotImplementedError
 
 
-class ReLUQuant(_QuantActivation):
+class ActQuant(_QuantActivation):
     r"""
-    ReLUQuant activation function. Add Fake Quant OP after Relu OP.
+    Quantization aware training activation function.
 
-    For a more Detailed overview of ReLU op.
-
-    Args:
-        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
-        per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
-        num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
-        symmetric (bool): Quantization algorithm use symmetric or not. Default: False.
-        narrow_range (bool): Quantization algorithm use narrow range or not. Default: False.
-        quant_delay (int): Quantization delay parameters according by global step. Default: 0.
-
-    Inputs:
-        - **x** (Tensor) - The input of ReLUQuant.
-
-    Outputs:
-        Tensor, with the same type and shape as the `x`.
-
-    Examples:
-        >>> relu_quant = nn.ReLUQuant()
-        >>> input_x = Tensor(np.array([[1, 2, 0], [-1, -2, 1]]), mindspore.float32)
-        >>> result = relu_quant(input_x)
-    """
-
-    def __init__(self,
-                 ema_decay=0.999,
-                 per_channel=False,
-                 num_bits=8,
-                 symmetric=False,
-                 narrow_range=False,
-                 quant_delay=0):
-        super(ReLUQuant, self).__init__()
-        self.fake_quant_act = FakeQuantWithMinMax(min_init=0,
-                                                  max_init=6,
-                                                  ema=True,
-                                                  ema_decay=ema_decay,
-                                                  per_channel=per_channel,
-                                                  num_bits=num_bits,
-                                                  symmetric=symmetric,
-                                                  narrow_range=narrow_range,
-                                                  quant_delay=quant_delay)
-        self.relu = P.ReLU()
-
-    def construct(self, x):
-        x = self.relu(x)
-        x = self.fake_quant_act(x)
-        return x
-
-    def get_origin(self):
-        return self.relu
-
-
-class ReLU6Quant(_QuantActivation):
-    r"""
-    ReLU6Quant activation function.
-
-    Add Fake Quant OP after Relu6. Not Recommand to used these cell for Fake Quant Op
+    Add Fake Quant OP after activation. Not Recommand to used these cell for Fake Quant Op
     Will climp the max range of the activation and the relu6 do the same operation.
     For a more Detailed overview of ReLU6 op.
 
     Args:
+        activation (Cell): Activation cell class.
         ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
         per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
         num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
@@ -883,19 +832,20 @@ class ReLU6Quant(_QuantActivation):
         Tensor, with the same type and shape as the `x`.
 
     Examples:
-        >>> relu6_quant = nn.ReLU6Quant(4, 1)
+        >>> act_quant = nn.ActQuant(4, 1)
         >>> input_x = Tensor(np.array([[1, 2, -1], [-2, 0, -1]]), mindspore.float32)
-        >>> result = relu6_quant(input_x)
+        >>> result = act_quant(input_x)
     """
 
     def __init__(self,
+                 activation,
                  ema_decay=0.999,
                  per_channel=False,
                  num_bits=8,
                  symmetric=False,
                  narrow_range=False,
                  quant_delay=0):
-        super(ReLU6Quant, self).__init__()
+        super(ActQuant, self).__init__()
         self.fake_quant_act = FakeQuantWithMinMax(min_init=0,
                                                   max_init=6,
                                                   ema=True,
@@ -905,15 +855,15 @@ class ReLU6Quant(_QuantActivation):
                                                   symmetric=symmetric,
                                                   narrow_range=narrow_range,
                                                   quant_delay=quant_delay)
-        self.relu6 = P.ReLU6()
+        self.act = activation()
 
     def construct(self, x):
-        x = self.relu6(x)
+        x = self.act(x)
         x = self.fake_quant_act(x)
         return x
 
     def get_origin(self):
-        return self.relu6
+        return self.act
 
 
 class HSwishQuant(_QuantActivation):
@@ -923,6 +873,7 @@ class HSwishQuant(_QuantActivation):
     For a more Detailed overview of HSwish op.
 
     Args:
+        activation (Cell): Activation cell class.
         ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
         per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
         num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
@@ -943,6 +894,7 @@ class HSwishQuant(_QuantActivation):
     """
 
     def __init__(self,
+                 activation,
                  ema_decay=0.999,
                  per_channel=False,
                  num_bits=8,
@@ -968,7 +920,10 @@ class HSwishQuant(_QuantActivation):
                                                         symmetric=symmetric,
                                                         narrow_range=narrow_range,
                                                         quant_delay=quant_delay)
-        self.act = P.HSwish()
+        if issubclass(activation, nn.HSwish):
+            self.act = activation()
+        else:
+            raise ValueError("Activation should be `nn.HSwish`")
 
     def construct(self, x):
         x = self.fake_quant_act_before(x)
@@ -987,6 +942,7 @@ class HSigmoidQuant(_QuantActivation):
     For a more Detailed overview of HSigmoid op.
 
     Args:
+        activation (Cell): Activation cell class.
         ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
         per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
         num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8.
@@ -1007,6 +963,7 @@ class HSigmoidQuant(_QuantActivation):
     """
 
     def __init__(self,
+                 activation,
                  ema_decay=0.999,
                  per_channel=False,
                  num_bits=8,
@@ -1032,7 +989,10 @@ class HSigmoidQuant(_QuantActivation):
                                                         symmetric=symmetric,
                                                         narrow_range=narrow_range,
                                                         quant_delay=quant_delay)
-        self.act = P.HSigmoid()
+        if issubclass(activation, nn.HSwish):
+            self.act = activation()
+        else:
+            raise ValueError("Activation should be `nn.HSigmoid`")
 
     def construct(self, x):
         x = self.fake_quant_act_before(x)
@@ -1209,9 +1169,9 @@ class QuantBlock(Cell):
         return x
 
     def extend_repr(self):
-        str_info = f'quant={self.quant}, core_op={type(self.core_op)}'
+        str_info = f'quant={self.quant}, core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
         if self.has_bias:
-            str_info = str_info + f', bias={self.bias}'
+            str_info = str_info + f', bias=shape[{self.bias.shape}]'
         if self.has_act:
             str_info = str_info + f', activation={self.activation}'
         str_info = str_info + f', dequant={self.dequant}'
diff --git a/mindspore/nn/optim/__init__.py b/mindspore/nn/optim/__init__.py
index f1dac586bc9..538c4000678 100644
--- a/mindspore/nn/optim/__init__.py
+++ b/mindspore/nn/optim/__init__.py
@@ -20,14 +20,14 @@ The optimizer is used to calculate and update the gradients.
 """
 from .optimizer import Optimizer
 from .momentum import Momentum
-from .adam import Adam, AdamWeightDecay, AdamWeightDecayDynamicLR
+from .adam import Adam, PSAdam, AdamWeightDecay, AdamWeightDecayDynamicLR
 from .lamb import Lamb
 from .sgd import SGD
 from .lars import LARS
-from .ftrl import FTRL
+from .ftrl import FTRL, PSFTRL
 from .rmsprop import RMSProp
 from .proximal_ada_grad import ProximalAdagrad
 from .lazyadam import LazyAdam
 
-__all__ = ['Optimizer', 'Momentum', 'LARS', 'Adam', 'AdamWeightDecay', 'LazyAdam',
-           'AdamWeightDecayDynamicLR', 'Lamb', 'SGD', 'FTRL', 'RMSProp', 'ProximalAdagrad']
+__all__ = ['Optimizer', 'Momentum', 'LARS', 'Adam', 'PSAdam', 'AdamWeightDecay', 'LazyAdam',
+           'AdamWeightDecayDynamicLR', 'Lamb', 'SGD', 'FTRL', 'PSFTRL', 'RMSProp', 'ProximalAdagrad']
diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py
index b73c284aab7..eb6e64074f0 100755
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@@ -27,6 +27,7 @@ from mindspore._checkparam import Rel
 from .optimizer import Optimizer
 
 _adam_opt = C.MultitypeFuncGraph("adam_opt")
+_adam_push_pull_opt = C.MultitypeFuncGraph("_adam_push_pull_opt")
 
 
 @_adam_opt.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
@@ -129,6 +130,31 @@ def _run_opt_with_one_number(opt, sparse_opt, beta1_power, beta2_power, beta1, b
                                     eps, gradient))
     return success
 
+@_adam_push_pull_opt.register("Function", "Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
+                              "Tensor", "Tuple", "Tensor", "Tensor", "Tensor")
+def _run_push_pull_opt_with_sparse(push, pull, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, params,
+                                   moment1, moment2):
+    """Apply sparse adam optimizer by push and pull to the weight parameter when the gradient is sparse."""
+    success = True
+    op_shape = P.Shape()
+    shapes = (op_shape(params), op_shape(moment1), op_shape(moment2),
+              op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1),
+              op_shape(beta2), op_shape(eps), op_shape(gradient[1]), op_shape(gradient[0]))
+    success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
+                                           eps, gradient[1], gradient[0]), shapes), params))
+    return success
+
+
+@_adam_push_pull_opt.register("Function", "Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
+                              "Tensor", "Tensor", "Tensor", "Tensor", "Tensor")
+def _run_push_pull_opt_with_one_number(push, pull, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, params,
+                                       moment1, moment2):
+    """Apply adam optimizer by push and pull to the weight parameter using Tensor."""
+    success = True
+    op_shape = P.Shape()
+    success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
+                                          (op_shape(params), op_shape(moment1), op_shape(moment2))), params))
+    return success
 
 class Adam(Optimizer):
     r"""
@@ -162,8 +188,8 @@ class Adam(Optimizer):
 
         To improve parameter groups performance, the customized order of parameters can be supported.
 
-        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
-        `sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
+        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network.
+        The sparse feature is under continuous development. The sparse
         behavior is currently performed on the CPU.
 
     Args:
@@ -274,6 +300,51 @@ class Adam(Optimizer):
                                 gradients, params, moment1, moment2)
         return success
 
+class PSAdam(Optimizer):
+    '''The same usage as Adam optimizer except the parameters are set PS mode.'''
+    def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-8, use_locking=False,
+                 use_nesterov=False, weight_decay=0.0, loss_scale=1.0):
+        super(PSAdam, self).__init__(learning_rate, params, weight_decay, loss_scale)
+        _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name)
+        validator.check_value_type("use_locking", use_locking, [bool], self.cls_name)
+        validator.check_value_type("use_nesterov", use_nesterov, [bool], self.cls_name)
+
+        self.beta1 = Tensor(beta1, mstype.float32)
+        self.beta2 = Tensor(beta2, mstype.float32)
+        self.beta1_power = Parameter(initializer(1, [1], mstype.float32), name="beta1_power")
+        self.beta2_power = Parameter(initializer(1, [1], mstype.float32), name="beta2_power")
+        self.eps = Tensor(eps, mstype.float32)
+
+        self.moment1 = self.parameters.clone(prefix="moment1", init='zeros')
+        self.moment2 = self.parameters.clone(prefix="moment2", init='zeros')
+
+        self.hyper_map = C.HyperMap()
+        self.push = P.Push("Adam", [0, 1, 2])
+        self.push.add_prim_attr("primitive_target", "CPU")
+        self.pull = P.Pull()
+        self.pull.add_prim_attr("primitive_target", "CPU")
+
+    def construct(self, gradients):
+        params = self.parameters
+        moment1 = self.moment1
+        moment2 = self.moment2
+        gradients = self.decay_weight(gradients)
+        gradients = self.scale_grad(gradients)
+        lr = self.get_lr()
+
+        beta1_power = self.beta1_power * self.beta1
+        self.beta1_power = beta1_power
+        beta2_power = self.beta2_power * self.beta2
+        self.beta2_power = beta2_power
+        if self.is_group_lr:
+            success = self.map_(F.partial(_adam_push_pull_opt, self.push, self.pull, beta1_power, beta2_power,
+                                          self.beta1, self.beta2, self.eps),
+                                lr, gradients, params, moment1, moment2)
+        else:
+            success = self.map_(F.partial(_adam_push_pull_opt, self.push, self.pull, beta1_power, beta2_power,
+                                          self.beta1, self.beta2, self.eps, lr),
+                                gradients, params, moment1, moment2)
+        return success
 
 class AdamWeightDecay(Optimizer):
     """
@@ -388,7 +459,7 @@ class AdamWeightDecayDynamicLR(Optimizer):
                  beta2=0.999,
                  eps=1e-6,
                  weight_decay=0.0,
-                 decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name):
+                 decay_filter=lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower()):
         super(AdamWeightDecayDynamicLR, self).__init__(0.0, params)
         if self.is_group:
             raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.")
diff --git a/mindspore/nn/optim/ftrl.py b/mindspore/nn/optim/ftrl.py
index b2954430b4c..dd2ebddfa78 100644
--- a/mindspore/nn/optim/ftrl.py
+++ b/mindspore/nn/optim/ftrl.py
@@ -22,6 +22,7 @@ from mindspore._checkparam import Rel
 from .optimizer import Optimizer, _apply_decay, _grad_scale
 
 _ftrl_opt = C.MultitypeFuncGraph("ftrl_opt")
+_ftrl_push_pull_opt = C.MultitypeFuncGraph("ftrl_opt")
 
 
 @_ftrl_opt.register("Function", "Function", "Tensor", "Number", "Number", "Number", "Tensor", "Tuple", "Tensor",
@@ -41,6 +42,26 @@ def _tensor_run_opt(opt, spars_opt, learning_rate, l1, l2, lr_power, linear, gra
     success = F.depend(success, opt(weight, moment, linear, gradient, learning_rate, l1, l2, lr_power))
     return success
 
+@_ftrl_push_pull_opt.register("Function", "Function", "Tensor", "Number", "Number", "Number", "Tensor", "Tuple",
+                              "Tensor", "Tensor")
+def _tensor_run_push_pull_opt_with_sparse(push, pull, learning_rate, l1, l2, lr_power, linear, gradient,
+                                          weight, moment):
+    success = True
+    op_shape = P.Shape()
+    shapes = (op_shape(weight), op_shape(moment), op_shape(linear), op_shape(gradient[1]), op_shape(gradient[0]))
+    success = F.depend(success, pull(push((gradient[1], gradient[0]), shapes), weight))
+    return success
+
+
+@_ftrl_push_pull_opt.register("Function", "Function", "Tensor", "Number", "Number", "Number", "Tensor", "Tensor",
+                              "Tensor", "Tensor")
+def _tensor_run_push_pull_opt_with_one_number(push, pull, learning_rate, l1, l2, lr_power, linear, gradient,
+                                              weight, moment):
+    success = True
+    op_shape = P.Shape()
+    success = F.depend(success, pull(push((gradient, learning_rate, l1, l2, lr_power),
+                                          (op_shape(weight), op_shape(moment), op_shape(linear))), weight))
+    return success
 
 def _check_param(initial_accum, lr_power, l1, l2, use_locking, weight_decay=0.0, prim_name=None):
     """Check param."""
@@ -72,8 +93,8 @@ class FTRL(Optimizer):
     <https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf>`_ for engineering document.
 
     Note:
-        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
-        `sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
+        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network.
+        The sparse feature is under continuous development. The sparse
         behavior is currently performed on the CPU.
 
     Args:
@@ -131,3 +152,37 @@ class FTRL(Optimizer):
         success = self.map_(F.partial(_ftrl_opt, self.opt, self.sparse_opt, lr, self.l1, self.l2, self.lr_power),
                             linear, grads, params, moments)
         return success
+
+class PSFTRL(Optimizer):
+    def __init__(self, params, initial_accum=0.1, learning_rate=0.001, lr_power=-0.5, l1=0.0, l2=0.0,
+                 use_locking=False, loss_scale=1.0, weight_decay=0.0):
+        super(PSFTRL, self).__init__(learning_rate, params, loss_scale=loss_scale)
+        if self.is_group:
+            raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.")
+        _check_param(initial_accum, lr_power, l1, l2, use_locking, weight_decay, self.cls_name)
+        self.moments = self.parameters.clone(prefix="moments", init=initial_accum)
+        self.linear = self.parameters.clone(prefix="linear", init='zeros')
+        self.l1 = l1
+        self.l2 = l2
+        self.lr_power = lr_power
+        self.weight_decay = weight_decay
+        self.decay_tf = tuple((lambda: True)() for x in self.parameters)
+
+        self.hyper_map = C.HyperMap()
+        self.push = P.Push("Ftrl", [0, 1, 2])
+        self.push.add_prim_attr("primitive_target", "CPU")
+        self.pull = P.Pull()
+        self.pull.add_prim_attr("primitive_target", "CPU")
+
+    def construct(self, grads):
+        params = self.parameters
+        moments = self.moments
+        linear = self.linear
+        lr = self.learning_rate
+        if self.weight_decay > 0.0:
+            grads = self.hyper_map(F.partial(_apply_decay, self.weight_decay), self.decay_tf, params, grads)
+
+        grads = self.scale_grad(grads)
+        success = self.map_(F.partial(_ftrl_push_pull_opt, self.push, self.pull, lr, self.l1, self.l2, self.lr_power),
+                            linear, grads, params, moments)
+        return success
diff --git a/mindspore/nn/optim/lazyadam.py b/mindspore/nn/optim/lazyadam.py
index 4b97d2eb20b..79053984374 100644
--- a/mindspore/nn/optim/lazyadam.py
+++ b/mindspore/nn/optim/lazyadam.py
@@ -91,8 +91,8 @@ class LazyAdam(Optimizer):
         value of weight_decay > 0. When not separating parameter groups, the `weight_decay` in the API will be
         applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters.
 
-        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
-        `sparse_grad` of `Parameter` being set. The sparse behavior, to be notice, is not equivalent to the
+        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network.
+        The sparse behavior, to be notice, is not equivalent to the
         original Adam algorithm, as only the current indices parames will be updated. The sparse feature is under
         continuous development. The sparse behavior is currently performed on the CPU.
 
diff --git a/mindspore/nn/optim/proximal_ada_grad.py b/mindspore/nn/optim/proximal_ada_grad.py
index 75f3994e2ad..25cf4380344 100644
--- a/mindspore/nn/optim/proximal_ada_grad.py
+++ b/mindspore/nn/optim/proximal_ada_grad.py
@@ -59,8 +59,8 @@ class ProximalAdagrad(Optimizer):
     <http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_.
 
     Note:
-        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
-        `sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse
+        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network.
+        The sparse feature is under continuous development. The sparse
         behavior is currently performed on the CPU.
 
     Args:
@@ -71,7 +71,7 @@ class ProximalAdagrad(Optimizer):
         l1 (float): l1 regularization strength, must be greater than or equal to zero. Default: 0.0.
         l2 (float): l2 regularization strength, must be greater than or equal to zero. Default: 0.0.
         use_locking (bool): If True use locks for update operation. Default: False.
-        loss_scale (float): Value for the loss scale. It should be equal to or greater than 1.0. Default: 1.0.
+        loss_scale (float): Value for the loss scale. It should be greater than 0.0. Default: 1.0.
         wegith_decay (float): Weight decay value to multiply weight, must be zero or positive value. Default: 0.0.
 
     Inputs:
diff --git a/mindspore/nn/optim/rmsprop.py b/mindspore/nn/optim/rmsprop.py
index 8e8885aff77..c4d3347038f 100644
--- a/mindspore/nn/optim/rmsprop.py
+++ b/mindspore/nn/optim/rmsprop.py
@@ -171,7 +171,7 @@ class RMSProp(Optimizer):
             self.opt = P.ApplyRMSProp(use_locking)
 
         self.momentum = momentum
-        self.ms = self.parameters.clone(prefix="mean_square", init='zeros')
+        self.ms = self.parameters.clone(prefix="mean_square", init='ones')
         self.moment = self.parameters.clone(prefix="moment", init='zeros')
         self.hyper_map = C.HyperMap()
         self.epsilon = epsilon
diff --git a/mindspore/ops/__init__.py b/mindspore/ops/__init__.py
index b73d683284b..7265b3c98b6 100644
--- a/mindspore/ops/__init__.py
+++ b/mindspore/ops/__init__.py
@@ -32,7 +32,7 @@ Note:
 
 from .primitive import Primitive, PrimitiveWithInfer, prim_attr_register
 from .vm_impl_registry import get_vm_impl_fn, vm_impl_registry
-from .op_info_register import op_info_register, AkgRegOp, AiCPURegOp, TBERegOp, DataType
+from .op_info_register import op_info_register, AkgGpuRegOp, AkgAscendRegOp, AiCPURegOp, TBERegOp, DataType
 from .primitive import constexpr
 from .._c_expression import signature_rw, signature_kind
 
@@ -42,6 +42,6 @@ __primitive__ = [
 ]
 
 __all__ = ["get_vm_impl_fn", "vm_impl_registry",
-           "op_info_register", "AkgRegOp", "AiCPURegOp", "TBERegOp", "DataType",
+           "op_info_register", "AkgGpuRegOp", "AkgAscendRegOp", "AiCPURegOp", "TBERegOp", "DataType",
            "constexpr"]
 __all__.extend(__primitive__)
diff --git a/mindspore/ops/_grad/grad_array_ops.py b/mindspore/ops/_grad/grad_array_ops.py
index d1494bc0517..b1a3e1d98b7 100644
--- a/mindspore/ops/_grad/grad_array_ops.py
+++ b/mindspore/ops/_grad/grad_array_ops.py
@@ -191,13 +191,12 @@ def get_bprop_tile(self):
     return bprop
 
 
-@bprop_getters.register(inner.EmbeddingLookup)
+@bprop_getters.register(P.EmbeddingLookup)
 def get_bprop_embedding_lookup(self):
     """Generate bprop for EmbeddingLookup"""
     sub_op = P.Sub()
     reshape_op = P.Reshape()
-    host_reshape = P.Reshape().add_prim_attr('primitive_target', 'CPU')
-    def bprop_sparse(x, indices, offset, reduce_scatter_flag, split_num, out, dout):
+    def bprop_sparse(x, indices, offset, out, dout):
         x_shp = shape_op(x)
         new_indices = sub_op(indices, offset)
         # Reshape the 'new_indices'
@@ -205,17 +204,9 @@ def get_bprop_embedding_lookup(self):
         new_indices = reshape_op(new_indices, new_indices_shape_changed)
         x_shp_tail = x_shp[1:]
         actual_dout_shape_changed = new_indices_shape_changed + x_shp_tail
-        if reduce_scatter_flag is True:
-            # On host
-            elu_grad = G.EmbeddingLookupCommGrad()
-            actual_dout = elu_grad(dout, split_num)
-            # Reshape the 'actual_dout' on host
-            actual_dout = host_reshape(actual_dout, actual_dout_shape_changed)
-        else:
-            # Reshape the 'actual_dout' on device
-            actual_dout = reshape_op(dout, actual_dout_shape_changed)
-        return (new_indices, actual_dout, x_shp), zeros_like(indices), zeros_like(offset), \
-               zeros_like(reduce_scatter_flag), zeros_like(split_num)
+        # Reshape the 'actual_dout' on device
+        actual_dout = reshape_op(dout, actual_dout_shape_changed)
+        return (new_indices, actual_dout, x_shp), zeros_like(indices), zeros_like(offset)
     return bprop_sparse
 
 
@@ -248,19 +239,37 @@ def get_bprop_transpose(self):
     return bprop
 
 
+@constexpr
+def _concat_grad_uniform(input_shapes, input_nums):
+    """Helper function for bprop of Concat"""
+    is_uniform = True
+    for i in range(1, input_nums):
+        if input_shapes[i-1] != input_shapes[i]:
+            is_uniform = False
+            break
+    return is_uniform
+
 @bprop_getters.register(P.Concat)
 def get_bprop_concat(self):
     """Generate bprop for Concat"""
     axis = self.axis
+    is_ascend = context.get_context('device_target') == "Ascend"
 
     def bprop(x, out, dout):
         dx = ()
         out_offset = G.ConcatOffset(F.tuple_len(x), axis)(x)
-        for i in range(F.tuple_len(x)):
-            slice_out = P.Slice()(dout, out_offset[i], shape_op(x[i]))
-            dx = dx + (slice_out,)
+        input_nums = F.tuple_len(x)
+        input_shapes = ()
+        for i in range(input_nums):
+            input_shapes = input_shapes + (shape_op(x[i]),)
+        is_uniform = _concat_grad_uniform(input_shapes, input_nums)
+        if is_uniform and is_ascend:
+            dx = P.Split(axis, input_nums)(dout)
+        else:
+            for i in range(input_nums):
+                slice_out = P.Slice()(dout, out_offset[i], input_shapes[i])
+                dx = dx + (slice_out,)
         return (dx,)
-
     return bprop
 
 
@@ -644,6 +653,36 @@ def get_bprop_unsorted_segment_min(self):
     return bprop
 
 
+@bprop_getters.register(P.UnsortedSegmentProd)
+def get_bprop_unsorted_segment_prod(self):
+    """Generate bprop for UnsortedSegmentProd"""
+    equal = P.Equal()
+    cast = P.Cast()
+    select = P.Select()
+    gather = P.GatherV2()
+    greater = P.Greater()
+    ones_like = P.OnesLike()
+    maximum = P.Maximum()
+    unsorted_segment_prod = P.UnsortedSegmentProd()
+
+    def bprop(x, segment_ids, num_segments, out, dout):
+        is_zero = equal(x, 0)
+        num_zero = unsorted_segment_sum(cast(is_zero, mstype.int32), segment_ids, num_segments)
+        grad = select(greater(num_zero, 1), zeros_like(dout), dout)
+        non_zero_data = select(is_zero, ones_like(x), x)
+        non_zero_prod = unsorted_segment_prod(non_zero_data, segment_ids, num_segments)
+        zero_clipped_indices = maximum(segment_ids, zeros_like(segment_ids))
+        gathered_prod = gather(out, zero_clipped_indices, 0)
+        gathered_non_zero_prod = gather(non_zero_prod, zero_clipped_indices, 0)
+        prod_divided_by_x = gathered_prod / x
+        partial_derivative = select(is_zero, gathered_non_zero_prod, prod_divided_by_x)
+        gathered_grad, _, _ = _GatherDropNegatives(grad, segment_ids, zero_clipped_indices)
+        dx = gathered_grad * partial_derivative
+        return dx, zeros_like(segment_ids), zeros_like(num_segments)
+
+    return bprop
+
+
 @bprop_getters.register(P.SpaceToBatch)
 def get_bprop_space_to_batch(self):
     """Generate bprop for SpaceToBatch"""
diff --git a/mindspore/ops/_grad/grad_nn_ops.py b/mindspore/ops/_grad/grad_nn_ops.py
index 3a86a05943a..61c7e40960c 100755
--- a/mindspore/ops/_grad/grad_nn_ops.py
+++ b/mindspore/ops/_grad/grad_nn_ops.py
@@ -760,6 +760,19 @@ def get_bprop_ctc_loss(self):
     return bprop
 
 
+@bprop_getters.register(P.CTCLossV2)
+def get_bprop_ctc_loss_v2(self):
+    """Grad definition for `CTCLossV2` operation"""
+    expand = P.ExpandDims()
+
+    def bprop(inputs, labels, input_lengths, labels_lengths, out, dout):
+        grad_loss = out[1]
+        grad = grad_loss * expand(dout[0], -1)
+        return grad, zeros_like(labels), zeros_like(input_lengths), zeros_like(labels_lengths)
+
+    return bprop
+
+
 @bprop_getters.register(P.BasicLSTMCell)
 def get_bprop_basic_lstm_cell(self):
     """Grad definition for `BasicLSTMCell` operation."""
diff --git a/mindspore/ops/_op_impl/__init__.py b/mindspore/ops/_op_impl/__init__.py
index 65a12cd73c0..59729f833f4 100644
--- a/mindspore/ops/_op_impl/__init__.py
+++ b/mindspore/ops/_op_impl/__init__.py
@@ -17,7 +17,7 @@
 import platform
 from .aicpu import *
 if "Windows" not in platform.system():
-    from .akg.gpu import *
+    from .akg import *
     from .tbe import *
 
 __all__ = []
diff --git a/mindspore/ops/_op_impl/akg/__init__.py b/mindspore/ops/_op_impl/akg/__init__.py
index fd86dbf9991..c4c70b7aa1a 100644
--- a/mindspore/ops/_op_impl/akg/__init__.py
+++ b/mindspore/ops/_op_impl/akg/__init__.py
@@ -13,77 +13,6 @@
 # limitations under the License.
 # ============================================================================
 
-"""autodiff ops"""
-from .abs import _abs_akg
-from .add_n import _add_n_akg
-from .add import _add_akg
-from .apply_momentum import _apply_momentum_akg
-from .assign import _assign_akg
-from .inplace_assign import _inplace_assign_akg
-from .assign_add import _assign_add_akg
-from .bias_add_grad import _bias_add_grad_akg
-from .bias_add import _bias_add_akg
-from .cast import _cast_akg
-from .clear_zero import _clear_zero_akg
-from .conv_bn1 import _conv_bn1_akg
-from .conv2d_backprop_filter import _conv2d_backprop_filter_akg
-from .conv2d_backprop_input import _conv2d_backprop_input_akg
-from .conv2d import _conv2d_akg
-from .div import _div_akg
-from .equal_count import _equal_count_akg
-from .exp import _exp_akg
-from .five2four import _five2four_akg
-from .four2five import _four2five_akg
-from .fused_batch_norm_grad import _fused_batch_norm_grad_akg
-from .fused_batch_norm_infer import _fused_batch_norm_infer_akg
-from .fused_batch_norm import _fused_batch_norm_akg
-from .fused_bn1_grad import _bn1_grad_akg
-from .fused_bn1 import _fused_bn1_akg
-from .fused_bn2_grad import _bn2_grad_akg
-from .fused_bn2 import _fused_bn2_akg
-from .fused_bn3_grad import _bn3_grad_akg
-from .fused_bn3 import _fused_bn3_akg
-from .gather_v2 import _gather_v2_akg
-from .less import _less_akg
-from .log import _log_akg
-from .matmul import _matmul_akg
-from .batchmatmul import _batchmatmul_akg
-from .max_pool_grad_with_argmax import _max_pool_grad_with_argmax_akg
-from .max_pool_with_argmax import _max_pool_with_argmax_akg
-from .max import _max_akg
-from .maximum import _maximum_akg
-from .mean_grad import _mean_grad_akg
-from .mean import _mean_akg
-from .minimum import _minimum_akg
-from .mul import _mul_akg
-from .neg import _neg_akg
-from .one_hot import _one_hot_akg
-from .pow import _power_akg
-from .real_div import _real_div_akg
-from .reciprocal import _reciprocal_akg
-from .reduce_max import _reduce_max_akg
-from .reduce_mean import _reduce_mean_akg
-from .reduce_sum import _reduce_sum_akg
-from .relu_grad import _relu_grad_akg
-from .relu import _relu_akg
-from .reshape import _reshape_akg
-from .round import _round_akg
-from .rsqrt import _rsqrt_akg
-from .select import _select_akg
-from .softmax import _softmax_akg
-from .sparse_softmax_cross_entropy_with_logits import _sparse_softmax_cross_entropy_with_logits_akg
-from .sqrt import _sqrt_akg
-from .strided_slice import _strided_slice_akg
-from .sub import _sub_akg
-from .sum import _sum_akg
-from .tile import _tile_akg
-from .zeros_like import _zeros_like_akg
-from .argmax import _argmax_akg
-from .floordiv import _floor_div_akg
-from .equal import _equal_akg
-from .greater_equal import _greater_equal_akg
-from .less_equal import _less_equal_akg
-from .expand_dims import _expand_dims_akg
-from .greater import _greater_akg
-from .equiv_format import _equiv_format_akg
+"""akg ops"""
+from . import ascend
 from . import gpu
diff --git a/mindspore/ops/_op_impl/akg/abs.py b/mindspore/ops/_op_impl/akg/abs.py
deleted file mode 100644
index 8c08f405da4..00000000000
--- a/mindspore/ops/_op_impl/akg/abs.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Abs op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Abs",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _abs_akg():
-    """Abs AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/add.py b/mindspore/ops/_op_impl/akg/add.py
deleted file mode 100644
index 60544ea1c75..00000000000
--- a/mindspore/ops/_op_impl/akg/add.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""TensorAdd op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "TensorAdd",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32",
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0",
-                "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32",
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0",
-                "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32",
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0",
-                "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _add_akg():
-    """TensorAdd AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/add_n.py b/mindspore/ops/_op_impl/akg/add_n.py
deleted file mode 100644
index 53320f752ee..00000000000
--- a/mindspore/ops/_op_impl/akg/add_n.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""AddN op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "AddN",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float32","float16","float32", "float16", "float32",
-                "float16","float32"
-            ],
-            "format": [
-                "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0", "FracZ", "FracZ",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "dynamic",
-            "name": "inputs"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float32","float16","float32", "float16", "float32",
-                "float16","float32"
-            ],
-            "format": [
-                "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0", "FracZ", "FracZ",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _add_n_akg():
-    """AddN AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/apply_momentum.py b/mindspore/ops/_op_impl/akg/apply_momentum.py
deleted file mode 100644
index 71605718822..00000000000
--- a/mindspore/ops/_op_impl/akg/apply_momentum.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ApplyMomentum op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ApplyMomentum",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "use_nesterov",
-            "param_type": "optional",
-            "type": "bool"
-        },
-        {
-            "name": "gradient_scale",
-            "param_type": "optional",
-            "type": "float"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32","float32","float32"
-            ],
-            "format": [
-                "DefaultFormat","NC1HWC0","FracZ"
-            ],
-            "name": "variable"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32","float32","float32"
-            ],
-            "format": [
-                "DefaultFormat","NC1HWC0","FracZ"
-            ],
-            "name": "accumulation"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32","float32","float32"
-            ],
-            "format": [
-                "DefaultFormat","DefaultFormat","DefaultFormat"
-            ],
-            "name": "learning_rate"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32","float32","float32"
-            ],
-            "format": [
-                "DefaultFormat","NC1HWC0","FracZ"
-            ],
-            "name": "gradient"
-        },
-        {
-            "index": 4,
-            "dtype": [
-                "float32","float32","float32"
-            ],
-            "format": [
-                "DefaultFormat","DefaultFormat","DefaultFormat"
-            ],
-            "name": "momentum"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32","float32","float32"
-            ],
-            "format": [
-                "DefaultFormat","NC1HWC0","FracZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _apply_momentum_akg():
-    """ApplyMomentum AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/argmax.py b/mindspore/ops/_op_impl/akg/argmax.py
deleted file mode 100644
index b04862cbeb1..00000000000
--- a/mindspore/ops/_op_impl/akg/argmax.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Argmax op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Argmax",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "int32", "int32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _argmax_akg():
-    """Argmax AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/__init__.py b/mindspore/ops/_op_impl/akg/ascend/__init__.py
new file mode 100644
index 00000000000..a4d7aec7d0a
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/__init__.py
@@ -0,0 +1,30 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""__init__"""
+
+from .add import _add_akg
+from .batchmatmul import _batchmatmul_akg
+from .cast import _cast_akg
+from .expand_dims import _expand_dims_akg
+from .greater import _greater_akg
+from .inplace_assign import _inplace_assign_akg
+from .maximum import _maximum_akg
+from .minimum import _minimum_akg
+from .mul import _mul_akg
+from .real_div import _real_div_akg
+from .rsqrt import _rsqrt_akg
+from .select import _select_akg
+from .sqrt import _sqrt_akg
+from .sub import _sub_akg
diff --git a/mindspore/ops/_op_impl/akg/ascend/add.py b/mindspore/ops/_op_impl/akg/ascend/add.py
new file mode 100644
index 00000000000..d8689eed6d0
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/add.py
@@ -0,0 +1,42 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""TensorAdd op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("TensorAdd") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.I32_5HD, DT.I32_5HD, DT.I32_5HD) \
+    .dtype_format(DT.F16_FracZ, DT.F16_FracZ, DT.F16_FracZ) \
+    .dtype_format(DT.F32_FracZ, DT.F32_FracZ, DT.F32_FracZ) \
+    .dtype_format(DT.I32_FracZ, DT.I32_FracZ, DT.I32_FracZ) \
+    .dtype_format(DT.F16_FracNZ, DT.F16_FracNZ, DT.F16_FracNZ) \
+    .dtype_format(DT.F32_FracNZ, DT.F32_FracNZ, DT.F32_FracNZ) \
+    .dtype_format(DT.I32_FracNZ, DT.I32_FracNZ, DT.I32_FracNZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _add_akg():
+    """TensorAdd Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/batchmatmul.py b/mindspore/ops/_op_impl/akg/ascend/batchmatmul.py
new file mode 100644
index 00000000000..d7815c15e6d
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/batchmatmul.py
@@ -0,0 +1,33 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""BatchMatMul op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("BatchMatMul") \
+    .fusion_type("OPAQUE") \
+    .input(0, "x1") \
+    .input(1, "x2") \
+    .output(0, "output") \
+    .attr("transpose_a", "optional", "bool") \
+    .attr("transpose_b", "optional", "bool") \
+    .dtype_format(DT.F16_FracNZ, DT.F16_FracNZ, DT.F16_FracNZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _batchmatmul_akg():
+    """BatchMatMul AKG register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/cast.py b/mindspore/ops/_op_impl/akg/ascend/cast.py
new file mode 100644
index 00000000000..1b874352f8a
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/cast.py
@@ -0,0 +1,46 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Cast op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Cast") \
+    .fusion_type("OPAQUE") \
+    .input(0, "x") \
+    .output(0, "output") \
+    .attr("dst_type", "required", "str") \
+    .dtype_format(DT.F16_Default, DT.F32_Default) \
+    .dtype_format(DT.F16_Default, DT.I32_Default) \
+    .dtype_format(DT.F32_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.I32_Default) \
+    .dtype_format(DT.I32_Default, DT.F16_Default) \
+    .dtype_format(DT.I32_Default, DT.F32_Default) \
+    .dtype_format(DT.BOOL_Default, DT.F16_Default) \
+    .dtype_format(DT.BOOL_Default, DT.F32_Default) \
+    .dtype_format(DT.BOOL_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F32_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F16_5HD) \
+    .dtype_format(DT.BOOL_5HD, DT.I32_5HD) \
+    .dtype_format(DT.BOOL_5HD, DT.F32_5HD) \
+    .dtype_format(DT.F16_FracNZ, DT.F32_FracNZ) \
+    .dtype_format(DT.F32_FracNZ, DT.F16_FracNZ) \
+    .dtype_format(DT.BOOL_FracNZ, DT.I32_FracNZ) \
+    .dtype_format(DT.BOOL_FracNZ, DT.F32_FracNZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _cast_akg():
+    """Cast Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/expand_dims.py b/mindspore/ops/_op_impl/akg/ascend/expand_dims.py
new file mode 100644
index 00000000000..24faf241aab
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/expand_dims.py
@@ -0,0 +1,33 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""ExpandDims op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("ExpandDims") \
+    .fusion_type("OPAQUE") \
+    .input(0, "x") \
+    .output(0, "y") \
+    .attr("axis", "required", "int") \
+    .dtype_format(DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _expand_dims_akg():
+    """ExpandDims Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/greater.py b/mindspore/ops/_op_impl/akg/ascend/greater.py
new file mode 100644
index 00000000000..14164c895ba
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/greater.py
@@ -0,0 +1,34 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Greater op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Greater") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.BOOL_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.BOOL_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.BOOL_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.BOOL_5HD) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _greater_akg():
+    """Greater Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/inplace_assign.py b/mindspore/ops/_op_impl/akg/ascend/inplace_assign.py
new file mode 100644
index 00000000000..9f76706440e
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/inplace_assign.py
@@ -0,0 +1,41 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""InplaceAssign op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("InplaceAssign") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .input(2, "z") \
+    .output(0, "output") \
+    .attr("fake_output", "optional", "bool") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default, DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.I32_5HD, DT.I32_5HD, DT.I32_5HD, DT.I32_5HD) \
+    .dtype_format(DT.F16_FracZ, DT.F16_FracZ, DT.F16_FracZ, DT.F16_FracZ) \
+    .dtype_format(DT.F32_FracZ, DT.F32_FracZ, DT.F32_FracZ, DT.F32_FracZ) \
+    .dtype_format(DT.I32_FracZ, DT.I32_FracZ, DT.I32_FracZ, DT.I32_FracZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _inplace_assign_akg():
+    """InplaceAssign Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/maximum.py b/mindspore/ops/_op_impl/akg/ascend/maximum.py
new file mode 100644
index 00000000000..b57de7d15ac
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/maximum.py
@@ -0,0 +1,36 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Maximum op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Maximum") \
+    .fusion_type("COMMREDUCE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.I32_5HD, DT.I32_5HD, DT.I32_5HD) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _maximum_akg():
+    """Maximum Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/minimum.py b/mindspore/ops/_op_impl/akg/ascend/minimum.py
new file mode 100644
index 00000000000..cdc0abfc6d2
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/minimum.py
@@ -0,0 +1,39 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Minimum op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Minimum") \
+    .fusion_type("COMMREDUCE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.I32_5HD, DT.I32_5HD, DT.I32_5HD) \
+    .dtype_format(DT.F16_FracNZ, DT.F16_FracNZ, DT.F16_FracNZ) \
+    .dtype_format(DT.F32_FracNZ, DT.F32_FracNZ, DT.F32_FracNZ) \
+    .dtype_format(DT.I32_FracNZ, DT.I32_FracNZ, DT.I32_FracNZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _minimum_akg():
+    """Minimum Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/mul.py b/mindspore/ops/_op_impl/akg/ascend/mul.py
new file mode 100644
index 00000000000..ea21888b842
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/mul.py
@@ -0,0 +1,41 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Mul op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Mul") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .output(0, "output") \
+    .attr("x_shape", "required", "listInt") \
+    .attr("y_shape", "required", "listInt") \
+    .attr("data_format", "required", "listStr") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.F16_FracZ, DT.F16_FracZ, DT.F16_FracZ) \
+    .dtype_format(DT.F32_FracZ, DT.F32_FracZ, DT.F32_FracZ) \
+    .dtype_format(DT.F16_FracNZ, DT.F16_FracNZ, DT.F16_FracNZ) \
+    .dtype_format(DT.F32_FracNZ, DT.F32_FracNZ, DT.F32_FracNZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _mul_akg():
+    """Mul Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/real_div.py b/mindspore/ops/_op_impl/akg/ascend/real_div.py
new file mode 100644
index 00000000000..c7c3ad9eb6a
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/real_div.py
@@ -0,0 +1,36 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""RealDiv op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("RealDiv") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.F16_FracNZ, DT.F16_FracNZ, DT.F16_FracNZ) \
+    .dtype_format(DT.F32_FracNZ, DT.F32_FracNZ, DT.F32_FracNZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _real_div_akg():
+    """RealDiv Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/rsqrt.py b/mindspore/ops/_op_impl/akg/ascend/rsqrt.py
new file mode 100644
index 00000000000..55cf876951b
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/rsqrt.py
@@ -0,0 +1,35 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Rsqrt op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Rsqrt") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.I32_5HD, DT.I32_5HD) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _rsqrt_akg():
+    """Rsqrt Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/select.py b/mindspore/ops/_op_impl/akg/ascend/select.py
new file mode 100644
index 00000000000..67fee114ca3
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/select.py
@@ -0,0 +1,37 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Select op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Select") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "condition") \
+    .input(1, "x") \
+    .input(2, "y") \
+    .output(0, "output") \
+    .dtype_format(DT.BOOL_Default, DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.BOOL_Default, DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.BOOL_Default, DT.I32_Default, DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.BOOL_5HD, DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.BOOL_5HD, DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.BOOL_5HD, DT.I32_5HD, DT.I32_5HD, DT.I32_5HD) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _select_akg():
+    """Select Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/sqrt.py b/mindspore/ops/_op_impl/akg/ascend/sqrt.py
new file mode 100644
index 00000000000..43f64b89731
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/sqrt.py
@@ -0,0 +1,35 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Sqrt op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Sqrt") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.I32_5HD, DT.I32_5HD) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _sqrt_akg():
+    """Sqrt Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/ascend/sub.py b/mindspore/ops/_op_impl/akg/ascend/sub.py
new file mode 100644
index 00000000000..62001b3f447
--- /dev/null
+++ b/mindspore/ops/_op_impl/akg/ascend/sub.py
@@ -0,0 +1,42 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Sub op"""
+from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
+
+op_info = AkgAscendRegOp("Sub") \
+    .fusion_type("ELEMWISE") \
+    .input(0, "x") \
+    .input(1, "y") \
+    .output(0, "output") \
+    .dtype_format(DT.F16_Default, DT.F16_Default, DT.F16_Default) \
+    .dtype_format(DT.F32_Default, DT.F32_Default, DT.F32_Default) \
+    .dtype_format(DT.I32_Default, DT.I32_Default, DT.I32_Default) \
+    .dtype_format(DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
+    .dtype_format(DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
+    .dtype_format(DT.I32_5HD, DT.I32_5HD, DT.I32_5HD) \
+    .dtype_format(DT.F16_FracZ, DT.F16_FracZ, DT.F16_FracZ) \
+    .dtype_format(DT.F32_FracZ, DT.F32_FracZ, DT.F32_FracZ) \
+    .dtype_format(DT.I32_FracZ, DT.I32_FracZ, DT.I32_FracZ) \
+    .dtype_format(DT.F16_FracNZ, DT.F16_FracNZ, DT.F16_FracNZ) \
+    .dtype_format(DT.F32_FracNZ, DT.F32_FracNZ, DT.F32_FracNZ) \
+    .dtype_format(DT.I32_FracNZ, DT.I32_FracNZ, DT.I32_FracNZ) \
+    .get_op_info()
+
+
+@op_info_register(op_info)
+def _sub_akg():
+    """Sub Akg register"""
+    return
diff --git a/mindspore/ops/_op_impl/akg/assign.py b/mindspore/ops/_op_impl/akg/assign.py
deleted file mode 100644
index e7c5a082bdf..00000000000
--- a/mindspore/ops/_op_impl/akg/assign.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Assign op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Assign",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"
-            ],
-            "name": "ref"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"
-            ],
-            "name": "value"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _assign_akg():
-    """Assign AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/assign_add.py b/mindspore/ops/_op_impl/akg/assign_add.py
deleted file mode 100644
index 7d0d345764f..00000000000
--- a/mindspore/ops/_op_impl/akg/assign_add.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""AssignAdd op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "AssignAdd",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "ref"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "value"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _assign_add_akg():
-    """AssignAdd AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/batchmatmul.py b/mindspore/ops/_op_impl/akg/batchmatmul.py
deleted file mode 100644
index f5da71aa25e..00000000000
--- a/mindspore/ops/_op_impl/akg/batchmatmul.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""BatchMatMul op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "BatchMatMul",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "transpose_a",
-            "param_type": "optional",
-            "type": "bool"
-        },
-        {
-            "name": "transpose_b",
-            "param_type": "optional",
-            "type": "bool"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "FRACTAL_NZ"
-            ],
-            "name": "x1"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "FRACTAL_NZ"
-            ],
-            "name": "x2"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _batchmatmul_akg():
-    """BatchMatMul AKG register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/bias_add.py b/mindspore/ops/_op_impl/akg/bias_add.py
deleted file mode 100644
index 74f2bf7bcf5..00000000000
--- a/mindspore/ops/_op_impl/akg/bias_add.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""BiasAdd op"""
-
-from mindspore.ops.op_info_register import op_info_register
-
-@op_info_register("""{
-    "op_name": "BiasAdd",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "data_format",
-            "param_type": "optional",
-            "type": "listStr"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float32","float16","float32","float16","float32"
-            ],
-            "format": [
-                "NHWC","NHWC","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16","float32","float16","float32","float16","float32"
-            ],
-            "format": [
-                "NHWC","NHWC","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat"
-            ],
-            "name": "b"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float32","float16","float32","float16","float32"
-            ],
-            "format": [
-                "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _bias_add_akg():
-    """BiasAddGrad AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/bias_add_grad.py b/mindspore/ops/_op_impl/akg/bias_add_grad.py
deleted file mode 100644
index 7726af6692c..00000000000
--- a/mindspore/ops/_op_impl/akg/bias_add_grad.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""BiasAddGrad op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "BiasAddGrad",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "data_format",
-            "param_type": "optional",
-            "type": "listStr"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float32","float16","float32","float16","float32"
-            ],
-            "format": [
-                "NHWC","NHWC","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat"
-            ],
-            "name": "dout"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float32","float16","float32","float16","float32"
-            ],
-            "format": [
-                "DefaultFormat","DefaultFormat","NC1HWC0","NC1HWC0","DefaultFormat","DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _bias_add_grad_akg():
-    """BiasAddGrad AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/cast.py b/mindspore/ops/_op_impl/akg/cast.py
deleted file mode 100644
index a78d4d87e4a..00000000000
--- a/mindspore/ops/_op_impl/akg/cast.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Cast op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Cast",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "dst_type",
-            "param_type": "required",
-            "type": "str"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "bool", "bool", 
-                "float16", "float32", "int32", "int32", 
-                "bool", 
-                "float16", "float32", "bool", "bool",
-                "float16", "float32", "bool", "bool"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", 
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", 
-                "DefaultFormat", 
-                "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32", "float16", "int32", "float16", 
-                "int32", "int32", "float16", "float32", 
-                "float32", 
-                "float32", "float16", "int32", "float32",
-                "float32", "float16", "int32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", 
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "DefaultFormat", 
-                "DefaultFormat", 
-                "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _cast_akg():
-    """Cast AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/clear_zero.py b/mindspore/ops/_op_impl/akg/clear_zero.py
deleted file mode 100644
index 38bf35044f8..00000000000
--- a/mindspore/ops/_op_impl/akg/clear_zero.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ClearZero op"""
-
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ClearZero",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "pad_mod",
-            "param_type": "optional",
-            "type": "string"
-        },
-        {
-            "name": "window",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "pad",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "stride",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-    ]
-}""")
-def _clear_zero_akg():
-    """MaxPoolGradWithArgmax AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/conv2d.py b/mindspore/ops/_op_impl/akg/conv2d.py
deleted file mode 100644
index 709aca70012..00000000000
--- a/mindspore/ops/_op_impl/akg/conv2d.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Conv2D op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Conv2D",
-    "imply_type": "AutoDiff",
-    "fusion_type": "CONVLUTION",
-    "attr": [
-        {
-            "name": "x_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "w_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "pad_list",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "stride",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "dilation",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "FracZ"
-            ],
-            "name": "w"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _conv2d_akg():
-    """Conv2D AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/conv2d_backprop_filter.py b/mindspore/ops/_op_impl/akg/conv2d_backprop_filter.py
deleted file mode 100644
index 1e4e4f1a1ef..00000000000
--- a/mindspore/ops/_op_impl/akg/conv2d_backprop_filter.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Conv2DBackpropFilter op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Conv2DBackpropFilter",
-    "imply_type": "AutoDiff",
-    "fusion_type": "CONVLUTION",
-    "attr": [
-        {
-            "name": "input_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "filter_sizes",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "stride",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "pad_list",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "dilation",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "out_backprop"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "input"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "FracZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _conv2d_backprop_filter_akg():
-    """Conv2DBackpropFilter AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/conv2d_backprop_input.py b/mindspore/ops/_op_impl/akg/conv2d_backprop_input.py
deleted file mode 100644
index 52c7f2e7b39..00000000000
--- a/mindspore/ops/_op_impl/akg/conv2d_backprop_input.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Conv2DBackpropInput op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Conv2DBackpropInput",
-    "imply_type": "AutoDiff",
-    "fusion_type": "CONVLUTION",
-    "attr": [
-        {
-            "name": "input_sizes",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "filter_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "stride",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "pad_list",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "dilation",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "out_backprop"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "FracZ"
-            ],
-            "name": "filter"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _conv2d_backprop_input_akg():
-    """Conv2DBackpropInput AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/conv_bn1.py b/mindspore/ops/_op_impl/akg/conv_bn1.py
deleted file mode 100644
index 118c94e6fcf..00000000000
--- a/mindspore/ops/_op_impl/akg/conv_bn1.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ConvBN1 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ConvBN1",
-    "imply_type": "AutoDiff",
-    "fusion_type": "CONVLUTION",
-    "attr": [
-        {
-            "name": "x_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "w_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "pad_list",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "stride",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "dilation",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "FracZ"
-            ],
-            "name": "w"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "conv_res_16"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "var_part"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "mean"
-        }
-    ]
-}""")
-def _conv_bn1_akg():
-    """ConvBN1 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/div.py b/mindspore/ops/_op_impl/akg/div.py
deleted file mode 100644
index 56cdcca8684..00000000000
--- a/mindspore/ops/_op_impl/akg/div.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Div op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Div",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _div_akg():
-    """Div AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/equal.py b/mindspore/ops/_op_impl/akg/equal.py
deleted file mode 100644
index 35874c62bb2..00000000000
--- a/mindspore/ops/_op_impl/akg/equal.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Equal op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Equal",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "bool", "bool", "bool", "bool", "bool", "bool"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _equal_akg():
-    """Equal AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/equal_count.py b/mindspore/ops/_op_impl/akg/equal_count.py
deleted file mode 100644
index 9c575db7b31..00000000000
--- a/mindspore/ops/_op_impl/akg/equal_count.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""EqualCount op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "EqualCount",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32"
-            ],
-            "format": [
-                "DefaultFormat"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32"
-            ],
-            "format": [
-                "DefaultFormat"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32"
-            ],
-            "format": [
-                "DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _equal_count_akg():
-    """EqualCount AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/equiv_format.py b/mindspore/ops/_op_impl/akg/equiv_format.py
deleted file mode 100644
index 111451b15c5..00000000000
--- a/mindspore/ops/_op_impl/akg/equiv_format.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""EquivFormat op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "EquivFormat",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "FRACTAL_NZ", "FRACTAL_NZ", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _equiv_format_akg():
-    """EquivFormat AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/exp.py b/mindspore/ops/_op_impl/akg/exp.py
deleted file mode 100644
index 273b3348a45..00000000000
--- a/mindspore/ops/_op_impl/akg/exp.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Exp op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Exp",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _exp_akg():
-    """Exp AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/expand_dims.py b/mindspore/ops/_op_impl/akg/expand_dims.py
deleted file mode 100644
index 9e1b18153a7..00000000000
--- a/mindspore/ops/_op_impl/akg/expand_dims.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ExpandDims op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ExpandDims",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "y"
-        }
-    ]
-}""")
-def _expand_dims_akg():
-    """ExpandDims AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/five2four.py b/mindspore/ops/_op_impl/akg/five2four.py
deleted file mode 100644
index 1dac2c3628a..00000000000
--- a/mindspore/ops/_op_impl/akg/five2four.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Five2Four op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Five2Four",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "shape4d",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "dstType",
-            "param_type": "required",
-            "type": "str"
-        },
-        {
-            "name": "output_format",
-            "param_type": "required",
-            "type": "str"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float16","float16","float32","float16","float32"
-            ],
-            "format": [
-                "NC1HWC0","NC1HWC0","NC1HWC0","NC1HWC0","NC1HWC0","NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16","float16","float32","float32","float32","float32"
-            ],
-            "format": [
-                "DefaultFormat","NHWC","DefaultFormat","DefaultFormat","NHWC","NHWC"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _five2four_akg():
-    """Five2Four AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/floordiv.py b/mindspore/ops/_op_impl/akg/floordiv.py
deleted file mode 100644
index 99e577b4be1..00000000000
--- a/mindspore/ops/_op_impl/akg/floordiv.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""FloorDiv op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "FloorDiv",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "int32", "int32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _floor_div_akg():
-    """FloorDiv AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/four2five.py b/mindspore/ops/_op_impl/akg/four2five.py
deleted file mode 100644
index 01b6f857151..00000000000
--- a/mindspore/ops/_op_impl/akg/four2five.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Four2Five op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Four2Five",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "data_format",
-            "param_type": "optional",
-            "type": "listStr"
-        },
-        {
-            "name": "dst_type",
-            "param_type": "required",
-            "type": "str"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float32", "float16","float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NHWC", "NHWC", "NHWC"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float16", "float32", "float16", "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _four2five_akg():
-    """Four2Five AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_batch_norm.py b/mindspore/ops/_op_impl/akg/fused_batch_norm.py
deleted file mode 100644
index 5ce9839328e..00000000000
--- a/mindspore/ops/_op_impl/akg/fused_batch_norm.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""FusedBatchNorm op"""
-
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "FusedBatchNorm",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "momentum",
-            "param_type": "optional",
-            "type": "float"
-        },
-        {
-            "name": "epsilon",
-            "param_type": "optional",
-            "type": "float"
-        },
-        {
-            "name": "data_format",
-            "param_type": "optional",
-            "type": "listStr"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "scale"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "b"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "mean"
-        },
-        {
-            "index": 4,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "variance"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "y"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "running_mean"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "running_variance"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "save_mean"
-        },
-        {
-            "index": 4,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "save_inv_variance"
-        }
-    ]
-}""")
-def _fused_batch_norm_akg():
-    """FusedBatchNorm AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_batch_norm_grad.py b/mindspore/ops/_op_impl/akg/fused_batch_norm_grad.py
deleted file mode 100644
index 9191548f731..00000000000
--- a/mindspore/ops/_op_impl/akg/fused_batch_norm_grad.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""FusedBatchNormGrad op"""
-
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "FusedBatchNormGrad",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "data_format",
-            "param_type": "optional",
-            "type": "listStr"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "dy"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "scale"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "save_mean"
-        },
-        {
-            "index": 4,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "save_inv_variance"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "dx"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "bn_scale"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "bn_bias"
-        }
-    ]
-}""")
-def _fused_batch_norm_grad_akg():
-    """BiasAddGrad AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_batch_norm_infer.py b/mindspore/ops/_op_impl/akg/fused_batch_norm_infer.py
deleted file mode 100644
index 1e7743fa8f5..00000000000
--- a/mindspore/ops/_op_impl/akg/fused_batch_norm_infer.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""FusedBatchNormInfer op"""
-
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "FusedBatchNormInfer",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "momentum",
-            "param_type": "optional",
-            "type": "float"
-        },
-        {
-            "name": "epsilon",
-            "param_type": "optional",
-            "type": "float"
-        },
-        {
-            "name": "data_format",
-            "param_type": "optional",
-            "type": "listStr"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "scale"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "b"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "mean"
-        },
-        {
-            "index": 4,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "variance"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ]
-}""")
-def _fused_batch_norm_infer_akg():
-    """FusedBatchNormInfer AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_bn1.py b/mindspore/ops/_op_impl/akg/fused_bn1.py
deleted file mode 100644
index fdaa673f257..00000000000
--- a/mindspore/ops/_op_impl/akg/fused_bn1.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""FusedBN1 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "FusedBN1",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "data"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _fused_bn1_akg():
-    """FusedBN1 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_bn1_grad.py b/mindspore/ops/_op_impl/akg/fused_bn1_grad.py
deleted file mode 100644
index 8de6796d6f2..00000000000
--- a/mindspore/ops/_op_impl/akg/fused_bn1_grad.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""BNGrad1 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "BNGrad1",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "dy"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "data"
-        },{
-            "index": 2,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "mean"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _bn1_grad_akg():
-    """BNGrad1 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_bn2.py b/mindspore/ops/_op_impl/akg/fused_bn2.py
deleted file mode 100644
index e26a5ad8a06..00000000000
--- a/mindspore/ops/_op_impl/akg/fused_bn2.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""FusedBN2 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "FusedBN2",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "momentum",
-            "param_type": "optional",
-            "type": "float"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "mean"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "var_part"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "running_mean"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "running_var"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _fused_bn2_akg():
-    """FusedBN2 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_bn2_grad.py b/mindspore/ops/_op_impl/akg/fused_bn2_grad.py
deleted file mode 100644
index e29a9177b61..00000000000
--- a/mindspore/ops/_op_impl/akg/fused_bn2_grad.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""BNGrad1 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "BNGrad2",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "eps",
-            "param_type": "optional",
-            "type": "float"
-        },
-        {
-            "name": "data_shape",
-            "param_type": "optional",
-            "type": "listInt"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "dgamma_red_hw"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "dbeta_red_hw"
-        },{
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "variance"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "gamma"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 4,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _bn2_grad_akg():
-    """BNGrad2 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_bn3.py b/mindspore/ops/_op_impl/akg/fused_bn3.py
deleted file mode 100644
index 74f3f652f36..00000000000
--- a/mindspore/ops/_op_impl/akg/fused_bn3.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""FusedBN3 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "FusedBN3",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "eps",
-            "param_type": "optional",
-            "type": "float"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "data"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "mean"
-        },{
-            "index": 2,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "variance"
-        },{
-            "index": 3,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "gamma"
-        },{
-            "index": 4,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "beta"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _fused_bn3_akg():
-    """FusedBN3 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/fused_bn3_grad.py b/mindspore/ops/_op_impl/akg/fused_bn3_grad.py
deleted file mode 100644
index 5ffc57a68e2..00000000000
--- a/mindspore/ops/_op_impl/akg/fused_bn3_grad.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""BNGrad3 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "BNGrad3",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "dy"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "rs"
-        },{
-            "index": 2,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "dgamma_dx"
-        },
-        {
-            "index": 3,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "dbeta_dx"
-        },
-        {
-            "index": 4,
-            "dtype": [
-                "float32", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "data_minus_mean"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _bn3_grad_akg():
-    """BNGrad3 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/gather_v2.py b/mindspore/ops/_op_impl/akg/gather_v2.py
deleted file mode 100644
index 84ab7eb6696..00000000000
--- a/mindspore/ops/_op_impl/akg/gather_v2.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""GatherV2 op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "GatherV2",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "params"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "int32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "indices"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _gather_v2_akg():
-    """GatherV2 AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/gpu/cast.py b/mindspore/ops/_op_impl/akg/gpu/cast.py
index 2f31dab1bac..c8aef249cd9 100644
--- a/mindspore/ops/_op_impl/akg/gpu/cast.py
+++ b/mindspore/ops/_op_impl/akg/gpu/cast.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 """Cast op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-cast_op_info = AkgRegOp("Cast") \
+cast_op_info = AkgGpuRegOp("Cast") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
     .attr("dst_type", "required", "str") \
     .dtype_format(DataType.F16_Default, DataType.F32_Default) \
     .dtype_format(DataType.F32_Default, DataType.F16_Default) \
+    .dtype_format(DataType.F32_Default, DataType.I32_Default) \
     .dtype_format(DataType.I32_Default, DataType.F32_Default) \
     .dtype_format(DataType.BOOL_Default, DataType.F32_Default) \
     .get_op_info()
diff --git a/mindspore/ops/_op_impl/akg/gpu/equal.py b/mindspore/ops/_op_impl/akg/gpu/equal.py
index fa20392411d..40a3590f617 100644
--- a/mindspore/ops/_op_impl/akg/gpu/equal.py
+++ b/mindspore/ops/_op_impl/akg/gpu/equal.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """Equal op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-equal_op_info = AkgRegOp("Equal") \
+equal_op_info = AkgGpuRegOp("Equal") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/greater_equal.py b/mindspore/ops/_op_impl/akg/gpu/greater_equal.py
index b000cbd0e34..666c939b4b0 100644
--- a/mindspore/ops/_op_impl/akg/gpu/greater_equal.py
+++ b/mindspore/ops/_op_impl/akg/gpu/greater_equal.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """GreaterEqual op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-greater_equal_op_info = AkgRegOp("GreaterEqual") \
+greater_equal_op_info = AkgGpuRegOp("GreaterEqual") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/hsigmoid.py b/mindspore/ops/_op_impl/akg/gpu/hsigmoid.py
index 4e802c1cadf..34e1e7f14ac 100644
--- a/mindspore/ops/_op_impl/akg/gpu/hsigmoid.py
+++ b/mindspore/ops/_op_impl/akg/gpu/hsigmoid.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """HSigmoid op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-hsigmoid_op_info = AkgRegOp("HSigmoid") \
+hsigmoid_op_info = AkgGpuRegOp("HSigmoid") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/hsigmoid_grad.py b/mindspore/ops/_op_impl/akg/gpu/hsigmoid_grad.py
index 39b819138e6..5e08ffb41cb 100644
--- a/mindspore/ops/_op_impl/akg/gpu/hsigmoid_grad.py
+++ b/mindspore/ops/_op_impl/akg/gpu/hsigmoid_grad.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """HSigmoidGrad op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-hsigmoidgrad_op_info = AkgRegOp("HSigmoidGrad") \
+hsigmoidgrad_op_info = AkgGpuRegOp("HSigmoidGrad") \
     .fusion_type("OPAQUE") \
     .input(0, "y_grad") \
     .input(1, "x") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/hswish.py b/mindspore/ops/_op_impl/akg/gpu/hswish.py
index 29f20bafae6..77d2c3b50c5 100644
--- a/mindspore/ops/_op_impl/akg/gpu/hswish.py
+++ b/mindspore/ops/_op_impl/akg/gpu/hswish.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """HSwish op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-hswish_op_info = AkgRegOp("HSwish") \
+hswish_op_info = AkgGpuRegOp("HSwish") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/hswish_grad.py b/mindspore/ops/_op_impl/akg/gpu/hswish_grad.py
index 38e8c78e282..3857486f0cd 100644
--- a/mindspore/ops/_op_impl/akg/gpu/hswish_grad.py
+++ b/mindspore/ops/_op_impl/akg/gpu/hswish_grad.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """HSwishGrad op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-hswish_grad_op_info = AkgRegOp("HSwishGrad") \
+hswish_grad_op_info = AkgGpuRegOp("HSwishGrad") \
     .fusion_type("OPAQUE") \
     .input(0, "y_grad") \
     .input(1, "x") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/lessequal.py b/mindspore/ops/_op_impl/akg/gpu/lessequal.py
index a8babf7ae41..58c9c7f90a4 100644
--- a/mindspore/ops/_op_impl/akg/gpu/lessequal.py
+++ b/mindspore/ops/_op_impl/akg/gpu/lessequal.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """LessEqual op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-lessequal_op_info = AkgRegOp("LessEqual") \
+lessequal_op_info = AkgGpuRegOp("LessEqual") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/logical_and.py b/mindspore/ops/_op_impl/akg/gpu/logical_and.py
index da5b6965120..58abcd8064b 100644
--- a/mindspore/ops/_op_impl/akg/gpu/logical_and.py
+++ b/mindspore/ops/_op_impl/akg/gpu/logical_and.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """LogicalAnd op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-logicaland_op_info = AkgRegOp("LogicalAnd") \
+logicaland_op_info = AkgGpuRegOp("LogicalAnd") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
@@ -23,6 +23,7 @@ logicaland_op_info = AkgRegOp("LogicalAnd") \
     .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default, DataType.BOOL_Default) \
     .get_op_info()
 
+
 @op_info_register(logicaland_op_info)
 def _logical_and_akg():
     """LogicalAnd register"""
diff --git a/mindspore/ops/_op_impl/akg/gpu/logical_not.py b/mindspore/ops/_op_impl/akg/gpu/logical_not.py
index 4b3c7bf647f..33815f489a9 100644
--- a/mindspore/ops/_op_impl/akg/gpu/logical_not.py
+++ b/mindspore/ops/_op_impl/akg/gpu/logical_not.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 """LogicalNot op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-logical_not_op_info = AkgRegOp("LogicalNot") \
+logical_not_op_info = AkgGpuRegOp("LogicalNot") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
     .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default) \
     .get_op_info()
 
+
 @op_info_register(logical_not_op_info)
 def _logical_not_akg():
     """LogicalNot AutoDiff register"""
diff --git a/mindspore/ops/_op_impl/akg/gpu/logical_or.py b/mindspore/ops/_op_impl/akg/gpu/logical_or.py
index 3a642511c61..163674ac2a0 100644
--- a/mindspore/ops/_op_impl/akg/gpu/logical_or.py
+++ b/mindspore/ops/_op_impl/akg/gpu/logical_or.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """LogicalOr op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-logicalor_op_info = AkgRegOp("LogicalOr") \
+logicalor_op_info = AkgGpuRegOp("LogicalOr") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
@@ -23,6 +23,7 @@ logicalor_op_info = AkgRegOp("LogicalOr") \
     .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default, DataType.BOOL_Default) \
     .get_op_info()
 
+
 @op_info_register(logicalor_op_info)
 def _logical_or_akg():
     """LogicalOr register"""
diff --git a/mindspore/ops/_op_impl/akg/gpu/mean.py b/mindspore/ops/_op_impl/akg/gpu/mean.py
index b46b701b917..dd997ec0f11 100644
--- a/mindspore/ops/_op_impl/akg/gpu/mean.py
+++ b/mindspore/ops/_op_impl/akg/gpu/mean.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """SimpleMean op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-mean_op_info = AkgRegOp("SimpleMean") \
+mean_op_info = AkgGpuRegOp("SimpleMean") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/mean_grad.py b/mindspore/ops/_op_impl/akg/gpu/mean_grad.py
index e3e0121c200..ae4620305a8 100644
--- a/mindspore/ops/_op_impl/akg/gpu/mean_grad.py
+++ b/mindspore/ops/_op_impl/akg/gpu/mean_grad.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """SimpleMeanGrad op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-mean_grad_op_info = AkgRegOp("SimpleMeanGrad") \
+mean_grad_op_info = AkgGpuRegOp("SimpleMeanGrad") \
     .fusion_type("OPAQUE") \
     .input(0, "HEAD") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/mul.py b/mindspore/ops/_op_impl/akg/gpu/mul.py
index db5b1460edf..0da7b3fb6cf 100644
--- a/mindspore/ops/_op_impl/akg/gpu/mul.py
+++ b/mindspore/ops/_op_impl/akg/gpu/mul.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """Mul op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-mul_op_info = AkgRegOp("Mul") \
+mul_op_info = AkgGpuRegOp("Mul") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/notequal.py b/mindspore/ops/_op_impl/akg/gpu/notequal.py
index dc13449fc16..b9c9c55faf5 100644
--- a/mindspore/ops/_op_impl/akg/gpu/notequal.py
+++ b/mindspore/ops/_op_impl/akg/gpu/notequal.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """NotEqual op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-notequal_op_info = AkgRegOp("NotEqual") \
+notequal_op_info = AkgGpuRegOp("NotEqual") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/relu6.py b/mindspore/ops/_op_impl/akg/gpu/relu6.py
index 31bfebcd8d7..33ae7f4dada 100644
--- a/mindspore/ops/_op_impl/akg/gpu/relu6.py
+++ b/mindspore/ops/_op_impl/akg/gpu/relu6.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """ReLU6 op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-relu_op_info = AkgRegOp("ReLU6") \
+relu_op_info = AkgGpuRegOp("ReLU6") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/relu6_grad.py b/mindspore/ops/_op_impl/akg/gpu/relu6_grad.py
index 83d93f30775..c6ed702247d 100644
--- a/mindspore/ops/_op_impl/akg/gpu/relu6_grad.py
+++ b/mindspore/ops/_op_impl/akg/gpu/relu6_grad.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """ReLU6Grad op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-relu_grad_op_info = AkgRegOp("ReLU6Grad") \
+relu_grad_op_info = AkgGpuRegOp("ReLU6Grad") \
     .fusion_type("OPAQUE") \
     .input(0, "y_grad") \
     .input(1, "x") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/squeeze.py b/mindspore/ops/_op_impl/akg/gpu/squeeze.py
index cebf6ff1f34..8761b648903 100644
--- a/mindspore/ops/_op_impl/akg/gpu/squeeze.py
+++ b/mindspore/ops/_op_impl/akg/gpu/squeeze.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """Squeeze op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-squeeze_op_info = AkgRegOp("Squeeze") \
+squeeze_op_info = AkgGpuRegOp("Squeeze") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py b/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py
index 17e45a327a3..41eacbf18f2 100644
--- a/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py
+++ b/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """SqueezeGrad op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-squeeze_grad_op_info = AkgRegOp("SqueezeGrad") \
+squeeze_grad_op_info = AkgGpuRegOp("SqueezeGrad") \
     .fusion_type("OPAQUE") \
     .input(0, "y_grad") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/gpu/sub.py b/mindspore/ops/_op_impl/akg/gpu/sub.py
index 06b92fb49ef..eaa81240674 100644
--- a/mindspore/ops/_op_impl/akg/gpu/sub.py
+++ b/mindspore/ops/_op_impl/akg/gpu/sub.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """Sub op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-sub_op_info = AkgRegOp("Sub") \
+sub_op_info = AkgGpuRegOp("Sub") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .input(1, "y") \
@@ -25,6 +25,7 @@ sub_op_info = AkgRegOp("Sub") \
     .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \
     .get_op_info()
 
+
 @op_info_register(sub_op_info)
 def _sub_akg():
     """Sub AutoDiff register"""
diff --git a/mindspore/ops/_op_impl/akg/gpu/tile.py b/mindspore/ops/_op_impl/akg/gpu/tile.py
index 8c9de009795..e8e634d9a19 100644
--- a/mindspore/ops/_op_impl/akg/gpu/tile.py
+++ b/mindspore/ops/_op_impl/akg/gpu/tile.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 """Tile op"""
-from mindspore.ops.op_info_register import op_info_register, AkgRegOp, DataType
+from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType
 
-tile_op_info = AkgRegOp("Tile") \
+tile_op_info = AkgGpuRegOp("Tile") \
     .fusion_type("OPAQUE") \
     .input(0, "x") \
     .output(0, "output") \
diff --git a/mindspore/ops/_op_impl/akg/greater.py b/mindspore/ops/_op_impl/akg/greater.py
deleted file mode 100644
index 941946163a0..00000000000
--- a/mindspore/ops/_op_impl/akg/greater.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Greater op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Greater",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float16", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float16", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "bool", "bool", "bool", "bool"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _greater_akg():
-    """Greater AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/greater_equal.py b/mindspore/ops/_op_impl/akg/greater_equal.py
deleted file mode 100644
index 11642baa864..00000000000
--- a/mindspore/ops/_op_impl/akg/greater_equal.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""GreaterEqual op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "GreaterEqual",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "bool", "bool", "bool", "bool", "bool", "bool"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _greater_equal_akg():
-    """Equal AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/inplace_assign.py b/mindspore/ops/_op_impl/akg/inplace_assign.py
deleted file mode 100644
index 1cc40abe9b5..00000000000
--- a/mindspore/ops/_op_impl/akg/inplace_assign.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""InplaceAssign op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "InplaceAssign",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "fake_output",
-            "param_type": "optional",
-            "type": "bool"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"
-            ],
-            "name": "y"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"
-            ],
-            "name": "z"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0", "FracZ", "FracZ", "FracZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _inplace_assign_akg():
-    """InplaceAssign AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/less.py b/mindspore/ops/_op_impl/akg/less.py
deleted file mode 100644
index 499ed2e8fc0..00000000000
--- a/mindspore/ops/_op_impl/akg/less.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Less op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Less",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "bool", "bool"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _less_akg():
-    """Less AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/less_equal.py b/mindspore/ops/_op_impl/akg/less_equal.py
deleted file mode 100644
index 97fbdec0906..00000000000
--- a/mindspore/ops/_op_impl/akg/less_equal.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""LessEqual op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "LessEqual",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "bool", "bool", "bool", "bool", "bool", "bool"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _less_equal_akg():
-    """Equal AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/log.py b/mindspore/ops/_op_impl/akg/log.py
deleted file mode 100644
index 526538d17d1..00000000000
--- a/mindspore/ops/_op_impl/akg/log.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Log op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Log",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _log_akg():
-    """Log AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/matmul.py b/mindspore/ops/_op_impl/akg/matmul.py
deleted file mode 100644
index 084ba754fa9..00000000000
--- a/mindspore/ops/_op_impl/akg/matmul.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""MatMul op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "MatMul",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "transpose_a",
-            "param_type": "optional",
-            "type": "bool"
-        },
-        {
-            "name": "transpose_b",
-            "param_type": "optional",
-            "type": "bool"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "x1"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "x2"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _matmul_akg():
-    """MatMul AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/max.py b/mindspore/ops/_op_impl/akg/max.py
deleted file mode 100644
index 21fd4ef9c46..00000000000
--- a/mindspore/ops/_op_impl/akg/max.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Max op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Max",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "keep_dims",
-            "param_type": "required",
-            "type": "bool"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                 "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _max_akg():
-    """Max AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/max_pool_grad_with_argmax.py b/mindspore/ops/_op_impl/akg/max_pool_grad_with_argmax.py
deleted file mode 100644
index 4adad3eb883..00000000000
--- a/mindspore/ops/_op_impl/akg/max_pool_grad_with_argmax.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""MaxPoolGradWithArgmax op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "MaxPoolGradWithArgmax",
-    "imply_type": "AutoDiff",
-    "fusion_type": "CONVLUTION",
-    "attr": [
-        {
-            "name": "pad_mode",
-            "param_type": "optional",
-            "type": "str"
-         },
-        {
-            "name": "window",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "pad",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "stride",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float16"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "argmax"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "grad"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32"
-            ],
-            "format": [
-                "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _max_pool_grad_with_argmax_akg():
-    """MaxPoolGradWithArgmax AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/max_pool_with_argmax.py b/mindspore/ops/_op_impl/akg/max_pool_with_argmax.py
deleted file mode 100644
index 3ae36d47932..00000000000
--- a/mindspore/ops/_op_impl/akg/max_pool_with_argmax.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""MaxPoolWithArgmax op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "MaxPoolWithArgmax",
-    "imply_type": "AutoDiff",
-    "fusion_type": "CONVLUTION",
-    "attr": [
-        {
-            "name": "pad_mode",
-            "param_type": "optional",
-            "type": "str"
-        },
-        {
-            "name": "window",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "pad",
-            "param_type": "optional",
-            "type": "int"
-        },
-        {
-            "name": "stride",
-            "param_type": "optional",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "NC1HWC0"
-            ],
-            "name": "output"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16"
-            ],
-            "format": [
-                "DefaultFormat"
-            ],
-            "name": "argmax"
-        }
-    ]
-}""")
-def _max_pool_with_argmax_akg():
-    """MaxPoolWithArgmax AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/maximum.py b/mindspore/ops/_op_impl/akg/maximum.py
deleted file mode 100644
index 8d8de5270ac..00000000000
--- a/mindspore/ops/_op_impl/akg/maximum.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Maximum op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Maximum",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                 "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _maximum_akg():
-    """Maximum AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/mean.py b/mindspore/ops/_op_impl/akg/mean.py
deleted file mode 100644
index 0b49e768653..00000000000
--- a/mindspore/ops/_op_impl/akg/mean.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""SimpleMean op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "SimpleMean",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _mean_akg():
-    """SimpleMean AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/mean_grad.py b/mindspore/ops/_op_impl/akg/mean_grad.py
deleted file mode 100644
index 3b8379d1f0e..00000000000
--- a/mindspore/ops/_op_impl/akg/mean_grad.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""SimpleMeanGrad op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "SimpleMeanGrad",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "input_shape",
-            "param_type": "required",
-            "type": "listInt"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "HEAD"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _mean_grad_akg():
-    """SimpleMeanGrad AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/minimum.py b/mindspore/ops/_op_impl/akg/minimum.py
deleted file mode 100644
index 759df2085fc..00000000000
--- a/mindspore/ops/_op_impl/akg/minimum.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Minimum op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Minimum",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32",
-                "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32",
-                "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                 "float16", "float32", "int32", "float16", "float32", "int32",
-                 "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _minimum_akg():
-    """Minimum AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/mul.py b/mindspore/ops/_op_impl/akg/mul.py
deleted file mode 100644
index ab02c2d89e9..00000000000
--- a/mindspore/ops/_op_impl/akg/mul.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Mul op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Mul",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "x_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "y_shape",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "data_format",
-            "param_type": "required",
-            "type": "listStr"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32", "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "FracZ", "FracZ", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _mul_akg():
-    """Mul AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/neg.py b/mindspore/ops/_op_impl/akg/neg.py
deleted file mode 100644
index bc00d60271e..00000000000
--- a/mindspore/ops/_op_impl/akg/neg.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Neg op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Neg",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32",
-                "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                 "float16", "float32", "int32", "float16", "float32", "int32",
-                 "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _neg_akg():
-    """Neg AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/one_hot.py b/mindspore/ops/_op_impl/akg/one_hot.py
deleted file mode 100644
index c5034dbbd4b..00000000000
--- a/mindspore/ops/_op_impl/akg/one_hot.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""OneHot op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "OneHot",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "depth",
-            "param_type": "required",
-            "type": "int"
-        },
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "int32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "indices"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "on_value"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "int32", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "off_value"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _one_hot_akg():
-    """OneHot AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/pow.py b/mindspore/ops/_op_impl/akg/pow.py
deleted file mode 100644
index d782968c052..00000000000
--- a/mindspore/ops/_op_impl/akg/pow.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Pow op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Pow",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "power"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _power_akg():
-    """Pow AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/real_div.py b/mindspore/ops/_op_impl/akg/real_div.py
deleted file mode 100644
index 9fa37a24e33..00000000000
--- a/mindspore/ops/_op_impl/akg/real_div.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""RealDiv op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "RealDiv",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _real_div_akg():
-    """RealDiv AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/reciprocal.py b/mindspore/ops/_op_impl/akg/reciprocal.py
deleted file mode 100644
index 9fd7cc40b42..00000000000
--- a/mindspore/ops/_op_impl/akg/reciprocal.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Reciprocal op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Reciprocal",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _reciprocal_akg():
-    """Reciprocal AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/reduce_max.py b/mindspore/ops/_op_impl/akg/reduce_max.py
deleted file mode 100644
index b9db8ea83af..00000000000
--- a/mindspore/ops/_op_impl/akg/reduce_max.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ReduceMax op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ReduceMax",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "keep_dims",
-            "param_type": "required",
-            "type": "bool"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                 "float16", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _reduce_max_akg():
-    """ReduceMax AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/reduce_mean.py b/mindspore/ops/_op_impl/akg/reduce_mean.py
deleted file mode 100644
index 0a4ffdf2216..00000000000
--- a/mindspore/ops/_op_impl/akg/reduce_mean.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ReduceMean op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ReduceMean",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "keep_dims",
-            "param_type": "required",
-            "type": "bool"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _reduce_mean_akg():
-    """ReduceMean AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/reduce_sum.py b/mindspore/ops/_op_impl/akg/reduce_sum.py
deleted file mode 100644
index 20d091ac76c..00000000000
--- a/mindspore/ops/_op_impl/akg/reduce_sum.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ReduceSum op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ReduceSum",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "keep_dims",
-            "param_type": "required",
-            "type": "bool"
-        },
-        {
-            "name": "atomic_add",
-            "param_type": "optional",
-            "type": "str"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _reduce_sum_akg():
-    """ReduceSum AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/relu.py b/mindspore/ops/_op_impl/akg/relu.py
deleted file mode 100644
index b32725f8859..00000000000
--- a/mindspore/ops/_op_impl/akg/relu.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ReLU op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ReLU",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _relu_akg():
-    """ReLU AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/relu_grad.py b/mindspore/ops/_op_impl/akg/relu_grad.py
deleted file mode 100644
index c785b750fe1..00000000000
--- a/mindspore/ops/_op_impl/akg/relu_grad.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ReluGrad op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ReluGrad",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "y_backprop"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _relu_grad_akg():
-    """ReluGrad AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/reshape.py b/mindspore/ops/_op_impl/akg/reshape.py
deleted file mode 100644
index d200b66fa2e..00000000000
--- a/mindspore/ops/_op_impl/akg/reshape.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Reshape op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Reshape",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "shape",
-            "param_type": "required",
-            "type": "listInt"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "tensor"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _reshape_akg():
-    """Reshape AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/round.py b/mindspore/ops/_op_impl/akg/round.py
deleted file mode 100644
index 0625c3ceda7..00000000000
--- a/mindspore/ops/_op_impl/akg/round.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Round op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Round",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _round_akg():
-    """Round AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/rsqrt.py b/mindspore/ops/_op_impl/akg/rsqrt.py
deleted file mode 100644
index 9264864f914..00000000000
--- a/mindspore/ops/_op_impl/akg/rsqrt.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Rsqrt op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Rsqrt",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                 "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _rsqrt_akg():
-    """Rsqrt AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/select.py b/mindspore/ops/_op_impl/akg/select.py
deleted file mode 100644
index 006c6a5444e..00000000000
--- a/mindspore/ops/_op_impl/akg/select.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Select op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Select",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "bool", "bool", "bool", "bool", "bool", "bool"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "condition"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 2,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "int32", "float16", "int32", "float32", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "DefaultFormat", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _select_akg():
-    """Select AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/softmax.py b/mindspore/ops/_op_impl/akg/softmax.py
deleted file mode 100644
index a41c2aef368..00000000000
--- a/mindspore/ops/_op_impl/akg/softmax.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Softmax op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Softmax",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "listInt"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _softmax_akg():
-    """Softmax AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/sparse_softmax_cross_entropy_with_logits.py b/mindspore/ops/_op_impl/akg/sparse_softmax_cross_entropy_with_logits.py
deleted file mode 100644
index e9e828f312e..00000000000
--- a/mindspore/ops/_op_impl/akg/sparse_softmax_cross_entropy_with_logits.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""SparseSoftmaxCrossEntropyWithLogits op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "SparseSoftmaxCrossEntropyWithLogits",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "is_grad",
-            "param_type": "optional",
-            "type": "bool"
-        },
-        {
-            "name": "sens",
-            "param_type": "optional",
-            "type": "float"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "DefaultFormat"
-            ],
-            "name": "features"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32"
-            ],
-            "format": [
-                "DefaultFormat"
-            ],
-            "name": "labels"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float32"
-            ],
-            "format": [
-                "DefaultFormat"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _sparse_softmax_cross_entropy_with_logits_akg():
-    """SparseSoftmaxCrossEntropyWithLogits AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/sqrt.py b/mindspore/ops/_op_impl/akg/sqrt.py
deleted file mode 100644
index fcaa84b3d41..00000000000
--- a/mindspore/ops/_op_impl/akg/sqrt.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Sqrt op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Sqrt",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "param_type": "required",
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                 "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _sqrt_akg():
-    """Sqrt AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/strided_slice.py b/mindspore/ops/_op_impl/akg/strided_slice.py
deleted file mode 100644
index bdbd8dfc2f1..00000000000
--- a/mindspore/ops/_op_impl/akg/strided_slice.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""StridedSlice op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "StridedSlice",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "begin",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "end",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "strides",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "begin_mask",
-            "param_type": "required",
-            "type": "int"
-        },
-        {
-            "name": "end_mask",
-            "param_type": "required",
-            "type": "int"
-        },
-        {
-            "name": "ellipsis_mask",
-            "param_type": "required",
-            "type": "int"
-        },
-        {
-            "name": "new_axis_mask",
-            "param_type": "required",
-            "type": "int"
-        },
-        {
-            "name": "shrink_axis_mask",
-            "param_type": "required",
-            "type": "int"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32",  "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32",  "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _strided_slice_akg():
-    """StridedSlice AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/sub.py b/mindspore/ops/_op_impl/akg/sub.py
deleted file mode 100644
index 846aa280bb0..00000000000
--- a/mindspore/ops/_op_impl/akg/sub.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Sub op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Sub",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32",
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        },
-        {
-            "index": 1,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32",
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "y"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "int32", "float16", "float32", "int32", "float16", "float32",
-                "int32", "float16", "float32", "int32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0",
-                "FracZ", "FracZ", "FracZ", "FRACTAL_NZ", "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _sub_akg():
-    """Sub AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/sum.py b/mindspore/ops/_op_impl/akg/sum.py
deleted file mode 100644
index 501b387b250..00000000000
--- a/mindspore/ops/_op_impl/akg/sum.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Sum op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Sum",
-    "imply_type": "AutoDiff",
-    "fusion_type": "COMMREDUCE",
-    "attr": [
-        {
-            "name": "axis",
-            "param_type": "required",
-            "type": "listInt"
-        },
-        {
-            "name": "keepdims",
-            "param_type": "required",
-            "type": "bool"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "param_type": "required",
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32",
-                "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0",
-                "FRACTAL_NZ", "FRACTAL_NZ"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _sum_akg():
-    """Sum AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/tile.py b/mindspore/ops/_op_impl/akg/tile.py
deleted file mode 100644
index bd13978fe73..00000000000
--- a/mindspore/ops/_op_impl/akg/tile.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Tile op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "Tile",
-    "imply_type": "AutoDiff",
-    "fusion_type": "OPAQUE",
-    "attr": [
-        {
-            "name": "multiples",
-            "param_type": "required",
-            "type": "listInt"
-        }
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "int32", "float16", "float32", "int32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _tile_akg():
-    """Tile AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/akg/zeros_like.py b/mindspore/ops/_op_impl/akg/zeros_like.py
deleted file mode 100644
index a02ece22d71..00000000000
--- a/mindspore/ops/_op_impl/akg/zeros_like.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""ZerosLike op"""
-from mindspore.ops.op_info_register import op_info_register
-
-
-@op_info_register("""{
-    "op_name": "ZerosLike",
-    "imply_type": "AutoDiff",
-    "fusion_type": "ELEMWISE",
-    "attr": [
-    
-    ],
-    "inputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "x"
-        }
-    ],
-    "outputs": [
-        {
-            "index": 0,
-            "dtype": [
-                "float16", "float32", "float16", "float32"
-            ],
-            "format": [
-                "DefaultFormat", "DefaultFormat", "NC1HWC0", "NC1HWC0"
-            ],
-            "name": "output"
-        }
-    ]
-}""")
-def _zeros_like_akg():
-    """ZerosLike AutoDiff register"""
-    return
diff --git a/mindspore/ops/_op_impl/tbe/__init__.py b/mindspore/ops/_op_impl/tbe/__init__.py
index 8009280ab81..317509b5a90 100644
--- a/mindspore/ops/_op_impl/tbe/__init__.py
+++ b/mindspore/ops/_op_impl/tbe/__init__.py
@@ -133,6 +133,7 @@ from .sparse_apply_proximal_adagrad import _sparse_apply_proximal_adagrad
 from .apply_proximal_adagrad import _apply_proximal_adagrad
 from .transpose_d import _transpose_d_tbe
 from .unsorted_segment_sum import _unsorted_segment_sum_tbe
+from .unsorted_segment_prod import _unsorted_segment_prod_tbe
 from .logsoftmax_grad import _logsoftmax_grad_tbe
 from .logsoftmax import _logsoftmax_tbe
 from .select import _select_tbe
@@ -285,3 +286,5 @@ from .mod import _mod_tbe
 from .max_pool_grad_grad import _max_pool_grad_grad_tbe
 from .max_pool_grad_grad_with_argmax import _max_pool_grad_grad_with_argmax_tbe
 from .tensor_move import _tensor_move_tbe
+from .population_count import _population_count_tbe
+from .parallel_concat import _parallel_concat_tbe
diff --git a/mindspore/ops/_op_impl/tbe/parallel_concat.py b/mindspore/ops/_op_impl/tbe/parallel_concat.py
new file mode 100644
index 00000000000..46d8736fab8
--- /dev/null
+++ b/mindspore/ops/_op_impl/tbe/parallel_concat.py
@@ -0,0 +1,80 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""ParallelConcat op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+parallel_concat_op_info = TBERegOp("ParallelConcat") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("parallel_concat.so") \
+    .compute_cost(10) \
+    .kernel_name("parallel_concat") \
+    .partial_flag(True) \
+    .attr("shape", "required", "listInt", "all") \
+    .attr("N", "required", "int", "all") \
+    .input(0, "values", False, "dynamic", "all") \
+    .output(0, "output_data", False, "required", "all") \
+    .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default) \
+    .dtype_format(DataType.BOOL_5HD, DataType.BOOL_5HD) \
+    .dtype_format(DataType.I8_Default, DataType.I8_Default) \
+    .dtype_format(DataType.I8_5HD, DataType.I8_5HD) \
+    .dtype_format(DataType.U8_Default, DataType.U8_Default) \
+    .dtype_format(DataType.U8_5HD, DataType.U8_5HD) \
+    .dtype_format(DataType.I16_Default, DataType.I16_Default) \
+    .dtype_format(DataType.I16_5HD, DataType.I16_5HD) \
+    .dtype_format(DataType.U16_Default, DataType.U16_Default) \
+    .dtype_format(DataType.U16_5HD, DataType.U16_5HD) \
+    .dtype_format(DataType.I32_Default, DataType.I32_Default) \
+    .dtype_format(DataType.I32_5HD, DataType.I32_5HD) \
+    .dtype_format(DataType.U32_Default, DataType.U32_Default) \
+    .dtype_format(DataType.U32_5HD, DataType.U32_5HD) \
+    .dtype_format(DataType.I64_Default, DataType.I64_Default) \
+    .dtype_format(DataType.I64_5HD, DataType.I64_5HD) \
+    .dtype_format(DataType.U64_Default, DataType.U64_Default) \
+    .dtype_format(DataType.U64_5HD, DataType.U64_5HD) \
+    .dtype_format(DataType.F16_Default, DataType.F16_Default) \
+    .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \
+    .dtype_format(DataType.F32_Default, DataType.F32_Default) \
+    .dtype_format(DataType.F32_5HD, DataType.F32_5HD) \
+    .dtype_format(DataType.BOOL_NHWC, DataType.BOOL_NHWC) \
+    .dtype_format(DataType.BOOL_NCHW, DataType.BOOL_NCHW) \
+    .dtype_format(DataType.I8_NHWC, DataType.I8_NHWC) \
+    .dtype_format(DataType.I8_NCHW, DataType.I8_NCHW) \
+    .dtype_format(DataType.U8_NHWC, DataType.U8_NHWC) \
+    .dtype_format(DataType.U8_NCHW, DataType.U8_NCHW) \
+    .dtype_format(DataType.I16_NHWC, DataType.I16_NHWC) \
+    .dtype_format(DataType.I16_NCHW, DataType.I16_NCHW) \
+    .dtype_format(DataType.U16_NHWC, DataType.U16_NHWC) \
+    .dtype_format(DataType.U16_NCHW, DataType.U16_NCHW) \
+    .dtype_format(DataType.I32_NHWC, DataType.I32_NHWC) \
+    .dtype_format(DataType.I32_NCHW, DataType.I32_NCHW) \
+    .dtype_format(DataType.U32_NHWC, DataType.U32_NHWC) \
+    .dtype_format(DataType.U32_NCHW, DataType.U32_NCHW) \
+    .dtype_format(DataType.I64_NHWC, DataType.I64_NHWC) \
+    .dtype_format(DataType.I64_NCHW, DataType.I64_NCHW) \
+    .dtype_format(DataType.U64_NHWC, DataType.U64_NHWC) \
+    .dtype_format(DataType.U64_NCHW, DataType.U64_NCHW) \
+    .dtype_format(DataType.F16_NHWC, DataType.F16_NHWC) \
+    .dtype_format(DataType.F16_NCHW, DataType.F16_NCHW) \
+    .dtype_format(DataType.F32_NHWC, DataType.F32_NHWC) \
+    .dtype_format(DataType.F32_NCHW, DataType.F32_NCHW) \
+    .get_op_info()
+
+
+@op_info_register(parallel_concat_op_info)
+def _parallel_concat_tbe():
+    """ParallelConcat TBE register"""
+    return
diff --git a/mindspore/ops/_op_impl/tbe/population_count.py b/mindspore/ops/_op_impl/tbe/population_count.py
new file mode 100644
index 00000000000..14feded367e
--- /dev/null
+++ b/mindspore/ops/_op_impl/tbe/population_count.py
@@ -0,0 +1,38 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""PopulationCount op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+population_count_op_info = TBERegOp("PopulationCount") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("population_count.so") \
+    .compute_cost(10) \
+    .kernel_name("population_count") \
+    .partial_flag(True) \
+    .input(0, "x", False, "required", "all") \
+    .output(0, "y", False, "required", "all") \
+    .dtype_format(DataType.I16_5HD, DataType.U8_5HD) \
+    .dtype_format(DataType.I16_Default, DataType.U8_Default) \
+    .dtype_format(DataType.U16_5HD, DataType.U8_5HD) \
+    .dtype_format(DataType.U16_Default, DataType.U8_Default) \
+    .get_op_info()
+
+
+@op_info_register(population_count_op_info)
+def _population_count_tbe():
+    """PopulationCount TBE register"""
+    return
diff --git a/mindspore/ops/_op_impl/tbe/roi_align.py b/mindspore/ops/_op_impl/tbe/roi_align.py
index bc4eed80ce5..d392651217b 100644
--- a/mindspore/ops/_op_impl/tbe/roi_align.py
+++ b/mindspore/ops/_op_impl/tbe/roi_align.py
@@ -27,7 +27,7 @@ roi_align_op_info = TBERegOp("ROIAlign") \
     .attr("pooled_height", "required", "int", "all") \
     .attr("pooled_width", "required", "int", "all") \
     .attr("sample_num", "optional", "int", "all", "2") \
-    .attr("roi_end_mode", "optional", "0,1", "1") \
+    .attr("roi_end_mode", "optional", "int", "0,1", "1") \
     .input(0, "features", False, "required", "all") \
     .input(1, "rois", False, "required", "all") \
     .input(2, "rois_n", False, "optional", "all") \
diff --git a/mindspore/ops/_op_impl/tbe/unsorted_segment_prod.py b/mindspore/ops/_op_impl/tbe/unsorted_segment_prod.py
new file mode 100644
index 00000000000..40b04d17c3c
--- /dev/null
+++ b/mindspore/ops/_op_impl/tbe/unsorted_segment_prod.py
@@ -0,0 +1,48 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""UnsortedSegmentProdD op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+unsorted_segment_prod_d_op_info = TBERegOp("UnsortedSegmentProd") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("unsorted_segment_prod_d.so") \
+    .compute_cost(10) \
+    .kernel_name("unsorted_segment_prod_d") \
+    .partial_flag(True) \
+    .attr("num_segments", "required", "int", "all") \
+    .input(0, "data", False, "required", "all") \
+    .input(1, "segment_ids", False, "required", "all") \
+    .output(0, "y", False, "required", "all") \
+    .dtype_format(DataType.F16_5HD, DataType.I32_Default, DataType.F16_5HD) \
+    .dtype_format(DataType.F16_FracZ, DataType.I32_Default, DataType.F16_FracZ) \
+    .dtype_format(DataType.F16_C1HWNCoC0, DataType.I32_Default, DataType.F16_C1HWNCoC0) \
+    .dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default) \
+    .dtype_format(DataType.F32_5HD, DataType.I32_Default, DataType.F32_5HD) \
+    .dtype_format(DataType.F32_FracZ, DataType.I32_Default, DataType.F32_FracZ) \
+    .dtype_format(DataType.F32_C1HWNCoC0, DataType.I32_Default, DataType.F32_C1HWNCoC0) \
+    .dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default) \
+    .dtype_format(DataType.I32_5HD, DataType.I32_Default, DataType.I32_5HD) \
+    .dtype_format(DataType.I32_FracZ, DataType.I32_Default, DataType.I32_FracZ) \
+    .dtype_format(DataType.I32_C1HWNCoC0, DataType.I32_Default, DataType.I32_C1HWNCoC0) \
+    .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \
+    .get_op_info()
+
+
+@op_info_register(unsorted_segment_prod_d_op_info)
+def _unsorted_segment_prod_tbe():
+    """UnsortedSegmentProdD TBE register"""
+    return
diff --git a/mindspore/ops/composite/base.py b/mindspore/ops/composite/base.py
index b0f16d82bf3..0f28d9572fd 100644
--- a/mindspore/ops/composite/base.py
+++ b/mindspore/ops/composite/base.py
@@ -17,6 +17,7 @@
 
 """Basic composite operations."""
 from functools import partial
+from types import FunctionType
 
 from mindspore import context
 from ..._c_expression import EnvInstance_, GradOperation_, HyperMap_, Map_, MultitypeFuncGraph_, Tail_, \
@@ -25,6 +26,7 @@ from ...common import dtype as mstype
 from ...common.api import ms_function, _pynative_exec, _wrap_func
 from .. import functional as F
 from ...common.parameter import Parameter
+from ...common.tensor import Tensor
 
 
 __all__ = [EnvInstance_, TupleAdd_, TupleSlice_, UnpackCall_, TupleGetItemTensor_]
@@ -114,37 +116,48 @@ class GradOperation(GradOperation_):
         self.fn = None
         self.need_forward = False
 
+    def _pynative_forward_run(self, args, fn):
+        """ Pynative forward run to build grad graph. """
+        if self.sens_param:
+            args = args[:-1]
+        for arg in args:
+            if not isinstance(arg, Tensor):
+                raise TypeError("grad inputs should be tensor in pynative mode")
+        if isinstance(fn, FunctionType):
+            _pynative_exec.set_grad_flag(True)
+            _pynative_exec.new_graph(fn, *args)
+            output = fn(*args)
+            _pynative_exec.end_graph(fn, output, *args)
+        else:
+            if fn.is_run and not fn.requires_grad:
+                raise ValueError("obj must set_grad.")
+            if not fn.is_run:
+                self.need_forward = True
+                print("already has forward run before grad by user")
+            if self.need_forward:
+                fn.set_grad()
+                fn(*args)
+
     def __call__(self, fn, weights=None):
         grad_ = GradOperation('grad', self.get_all, self.get_by_list, self.sens_param)
         if self.grad_fn is None or self.fn != fn:
-            if self.get_by_list:
-                if context.get_context("mode") == context.GRAPH_MODE:
+            if context.get_context("mode") == context.GRAPH_MODE:
+                if self.get_by_list:
                     @ms_function(obj=fn)
                     def after_grad(*args):
                         return grad_(fn, weights)(*args)
                 else:
-                    @_wrap_func
+                    @ms_function(obj=fn)
                     def after_grad(*args):
-                        if fn.is_run and not fn.requires_grad:
-                            raise ValueError("obj must set_grad.")
-                        if not fn.is_run:
-                            self.need_forward = True
-                            print("already has forward run before grad by user")
-                        if self.need_forward:
-                            fn.set_grad()
-                            if self.sens_param:
-                                f_args = args[:-1]
-                                fn(*f_args)
-                            else:
-                                fn(*args)
-                        _pynative_exec.grad(grad_, fn, weights, *args)
-                        out = _pynative_exec(*args)
-                        _pynative_exec.clear()
-                        return out
+                        return grad_(fn)(*args)
             else:
-                @ms_function(obj=fn)
+                @_wrap_func
                 def after_grad(*args):
-                    return grad_(fn)(*args)
+                    self._pynative_forward_run(args, fn)
+                    _pynative_exec.grad(grad_, fn, weights, *args)
+                    out = _pynative_exec(*args)
+                    _pynative_exec.clear()
+                    return out
             self.grad_fn = after_grad
             self.fn = fn
         return self.grad_fn
diff --git a/mindspore/ops/functional.py b/mindspore/ops/functional.py
index a5c3165ab10..2be011cb773 100644
--- a/mindspore/ops/functional.py
+++ b/mindspore/ops/functional.py
@@ -158,7 +158,6 @@ make_indexed_slices = Primitive('MakeIndexedSlices')
 indexed_slices_get_values = Primitive('IndexedSlicesGetValues')
 indexed_slices_get_indices = Primitive('IndexedSlicesGetIndices')
 indexed_slices_get_dense_shape = Primitive('IndexedSlicesGetDenseShape')
-is_indexed_slices = Primitive('IsIndexedSlices')
 
 
 tensor_operator_registry.register('__add__', tensor_add)
@@ -166,6 +165,7 @@ tensor_operator_registry.register('__sub__', tensor_sub)
 tensor_operator_registry.register('__mul__', tensor_mul)
 tensor_operator_registry.register('__truediv__', tensor_div)
 tensor_operator_registry.register('__mod__', tensor_mod)
+tensor_operator_registry.register('__pow__', tensor_pow)
 tensor_operator_registry.register('__floordiv__', tensor_floordiv)
 #ms cannot support Tensor(True) compare
 tensor_operator_registry.register('__eq__', equal)
diff --git a/mindspore/ops/op_info_register.py b/mindspore/ops/op_info_register.py
index a7a60b7181c..6ab915e3699 100644
--- a/mindspore/ops/op_info_register.py
+++ b/mindspore/ops/op_info_register.py
@@ -215,10 +215,10 @@ class RegOp:
 class AkgRegOp(RegOp):
     """Class for Akg op info register."""
 
-    def __init__(self, op_name):
+    def __init__(self, op_name, processor):
         super(AkgRegOp, self).__init__(op_name)
-        self.imply_type = "AutoDiff"
-        self.processor = "cuda"
+        self.imply_type = "AKG"
+        self.processor = processor
 
     def input(self, index=None, name=None, **kwargs):
         """
@@ -270,6 +270,16 @@ class AkgRegOp(RegOp):
         return self
 
 
+class AkgGpuRegOp(AkgRegOp):
+    def __init__(self, op_name):
+        super(AkgGpuRegOp, self).__init__(op_name, "CUDA")
+
+
+class AkgAscendRegOp(AkgRegOp):
+    def __init__(self, op_name):
+        super(AkgAscendRegOp, self).__init__(op_name, "AiCore")
+
+
 class AiCPURegOp(RegOp):
     """Class for AiCPU op info register"""
 
diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py
index 88064875797..1602f2594d3 100644
--- a/mindspore/ops/operations/__init__.py
+++ b/mindspore/ops/operations/__init__.py
@@ -27,11 +27,11 @@ from .array_ops import (Argmax, Argmin, Cast, Concat, Pack, Unpack,
                         Rank, Reshape, ResizeNearestNeighbor, ArgMinWithValue,
                         SameTypeShape, ScatterAdd, ScatterSub, ScatterMul, ScatterDiv, ScatterMax, ScatterMin,
                         ScatterUpdate, ScalarToArray, ScalarToTensor, ScatterNd, ScatterNdUpdate, Select,
-                        Shape, Size, Slice, Split, TransShape, EmbeddingLookup,
+                        Shape, Size, Slice, Split, TransShape, ParallelConcat,
                         Squeeze, StridedSlice, Tile, TensorScatterUpdate,
-                        Transpose, TruncatedNormal, TupleToArray, UnsortedSegmentMin,
+                        Transpose, TruncatedNormal, TupleToArray, UnsortedSegmentMin, UnsortedSegmentProd,
                         UnsortedSegmentSum, SpaceToDepth, DepthToSpace, SpaceToBatch, BatchToSpace,
-                        SpaceToBatchND, BatchToSpaceND, BroadcastTo, InplaceUpdate, ReverseSequence)
+                        SpaceToBatchND, BatchToSpaceND, BroadcastTo, InplaceUpdate, ReverseSequence, EmbeddingLookup)
 from .comm_ops import (AllGather, AllReduce, _AlltoAll, ReduceScatter, Broadcast,
                        _MirrorOperator, ReduceOp, _VirtualDataset,
                        _VirtualDiv, _GetTensorSlice,
@@ -62,7 +62,7 @@ from .nn_ops import (LSTM, SGD, Adam, SparseApplyAdam, SparseApplyLazyAdam, Appl
                      DropoutDoMask, DropoutGrad, Dropout,
                      DropoutGenMask, Flatten, FusedBatchNorm, BNTrainingReduce, BNTrainingUpdate,
                      Gelu, Elu,
-                     GetNext, L2Normalize, LayerNorm, L2Loss, CTCLoss,
+                     GetNext, L2Normalize, LayerNorm, L2Loss, CTCLoss, CTCLossV2,
                      LogSoftmax,
                      MaxPool, DataFormatDimMap,
                      AvgPool, Conv2DBackpropInput, ConfusionMulGrad,
@@ -77,10 +77,10 @@ from .nn_ops import (LSTM, SGD, Adam, SparseApplyAdam, SparseApplyLazyAdam, Appl
                      ApplyAdaMax, ApplyAdadelta, ApplyAdagrad, ApplyAdagradV2,
                      ApplyAddSign, ApplyPowerSign, ApplyGradientDescent, ApplyProximalGradientDescent,
                      ApplyRMSProp, ApplyCenteredRMSProp, BasicLSTMCell, InTopK)
-from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode,
-                        CheckValid, MakeRefKey, Partial, Depend, CheckBprop)
 from . import _quant_ops
 from ._quant_ops import *
+from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, PopulationCount,
+                        CheckValid, MakeRefKey, Partial, Depend, CheckBprop, Push, Pull)
 from .thor_ops import *
 
 __all__ = [
@@ -260,6 +260,7 @@ __all__ = [
     'DepthwiseConv2dNative',
     'UnsortedSegmentSum',
     'UnsortedSegmentMin',
+    'UnsortedSegmentProd',
     "AllGather",
     "AllReduce",
     "ReduceScatter",
@@ -341,7 +342,12 @@ __all__ = [
     "InTopK",
     "CropAndResize",
     "LRN",
-    "Mod"
+    "Mod",
+    "PopulationCount",
+    "ParallelConcat",
+    "EmbeddingLookup",
+    "Push",
+    "Pull"
 ]
 
 __all__.sort()
diff --git a/mindspore/ops/operations/_inner_ops.py b/mindspore/ops/operations/_inner_ops.py
index be7e901757e..2d17da00282 100644
--- a/mindspore/ops/operations/_inner_ops.py
+++ b/mindspore/ops/operations/_inner_ops.py
@@ -394,76 +394,6 @@ class AscendDequant(PrimitiveWithInfer):
         return mstype.float16
 
 
-class EmbeddingLookup(PrimitiveWithInfer):
-    """
-    Returns a slice of input tensor based on the specified indices.
-
-    This Primitive has the similar functionality as GatherV2 operating on `axis = 0`, but has three more inputs:
-    `offset`, `reduce_scatter_flag` and `split_num`. This primitive runs on the host instead of devices.
-
-    Inputs:
-        - **input_params** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
-          The Tensor slice, instead of the entire Tensor.
-        - **input_indices** (Tensor) - The shape of tensor is :math:`(y_1, y_2, ..., y_S)`.
-          Specifies the indices of elements of the original Tensor. Values can be out of range of `input_params`,
-          and the exceeding part will be filled with 0 in the output.
-        - **offset** (int) - Specifies the offset value of this `input_params` slice. Thus the real indices
-          are equal to `input_indices` minus `offset`.
-        - **reduce_scatter_flag** (bool) - Specifies whether perform reduce_scatter on host or not.
-          Only constant value is allowed.
-        - **split_num** (int) - Specifies the number of partitions of the reduce_scatter produces. This variable
-          is used only if `reduce_scatter_flag` is True. Only constant value is allowed.
-
-
-    Outputs:
-        Tensor, the shape of tensor is :math:`(z_1, z_2, ..., z_N)`.
-
-    Examples:
-        >>> input_params = Tensor(np.array([[8, 9], [10, 11], [12, 13], [14, 15]]), mindspore.float32)
-        >>> input_indices = Tensor(np.array([[5, 2], [8, 5]]), mindspore.int32)
-        >>> offset = 4
-        >>> reduce_scatter_flag = False
-        >>> split_num = 1
-        >>> out = P.EmbeddingLookup()(input_params, input_indices, offset, reduce_scatter_flag, split_num)
-        [[[10, 11], [0 ,0]], [[0, 0], [10, 11]]]
-    """
-    @prim_attr_register
-    def __init__(self):
-        """init index_select"""
-        self.__setattr_flag__ = True
-        self.init_prim_io_names(inputs=['params', 'indices', 'offset', 'reduce_scatter_flag', 'split_num'],
-                                outputs=['output'])
-        self.add_prim_attr('primitive_target', 'CPU')
-
-    def __infer__(self, params, indices, offset, reduce_scatter_flag=False, split_num=2):
-        validator.check_subclass("params", params['dtype'], mstype.tensor, self.name)
-        validator.check_tensor_type_same({"indices": indices['dtype']}, mstype.int_type, self.name)
-        validator.check_subclass("offset", offset['dtype'], mstype.int_, self.name)
-        validator.check_subclass("split_num", split_num['dtype'], mstype.int_, self.name)
-        if split_num['value'] < 1:
-            raise ValueError("The parameter 'split_num' must be positive, but got %d." % split_num)
-        params_shp = params['shape']
-        out_shape = indices['shape'] + params_shp[1:]
-        if reduce_scatter_flag is None:
-            raise ValueError("The value of 'reduce_scatter_flag' is None.")
-        reduce_scatter_flag_value = reduce_scatter_flag['value']
-        if split_num is None:
-            raise ValueError("The value of 'split_num_value' is None.")
-        split_num_value = split_num['value']
-        if reduce_scatter_flag_value is True:
-            # Partition the tensor along the dimension 0. The shape size of dimension 0 should be divisible by
-            # (split_num * 8)
-            if out_shape[0] % (split_num_value * 8) != 0:
-                raise ValueError("The dimension 0 of the shape: %d, is not divisible by: %d." %
-                                 (out_shape[0], (split_num_value * 8)))
-            # After 'Concat' on host, the shape size of dimension 0 is: out_shape[0] // 8
-            out_shape[0] = out_shape[0] // 8
-        out = {'shape': out_shape,
-               'dtype': params['dtype'],
-               'value': None}
-        return out
-
-
 class SparseApplyFtrlNoReturn(PrimitiveWithInfer):
     """
     Update relevant entries according to the FTRL-proximal scheme.
@@ -747,7 +677,7 @@ class MatrixDiagPart(PrimitiveWithInfer):
         Tensor, data type same as input `x`. The shape should be x.shape[:-2] + [min(x.shape[-2:])].
 
     Examples:
-        >>> x = Tensor([[[-1, 0], [0, 1]], [-1, 0], [0, 1]], [[-1, 0], [0, 1]]], mindspore.float32)
+        >>> x = Tensor([[[-1, 0], [0, 1]], [[-1, 0], [0, 1]], [[-1, 0], [0, 1]]], mindspore.float32)
         >>> assist = Tensor(np.arange(-12, 0).reshape(3, 2, 2), mindspore.float32)
         >>> matrix_diag_part = P.MatrixDiagPart()
         >>> result = matrix_diag_part(x, assist)
@@ -789,11 +719,11 @@ class MatrixSetDiag(PrimitiveWithInfer):
         Tensor, data type same as input `x`. The shape same as `x`.
 
     Examples:
-        >>> x = Tensor([[[-1, 0], [0, 1]], [-1, 0], [0, 1]], [[-1, 0], [0, 1]]], mindspore.float32)
+        >>> x = Tensor([[[-1, 0], [0, 1]], [[-1, 0], [0, 1]], [[-1, 0], [0, 1]]], mindspore.float32)
         >>> diagonal = Tensor([[-1., 2.], [-1., 1.], [-1., 1.]], mindspore.float32)
         >>> matrix_set_diag = P.MatrixSetDiag()
         >>> result = matrix_set_diag(x, diagonal)
-        [[[-1, 0], [0, 2]], [-1, 0], [0, 1]], [[-1, 0], [0, 1]]]
+        [[[-1, 0], [0, 2]], [[-1, 0], [0, 1]], [[-1, 0], [0, 1]]]
 
     """
 
@@ -812,10 +742,10 @@ class MatrixSetDiag(PrimitiveWithInfer):
         validator.check("x shape", x_shape, "assist shape", assist_shape, Rel.EQ, self.name)
 
         if x_shape[-2] < x_shape[-1]:
-            validator.check("x shape excluding the last dimension", x_shape[:-1], "diagnoal shape",
-                            diagonal_shape, Rel.EQ, self.name)
+            validator.check("diagnoal shape", diagonal_shape, "x shape excluding the last dimension",
+                            x_shape[:-1], Rel.EQ, self.name)
         else:
-            validator.check("x shape excluding the second to last dimension", x_shape[:-2]+x_shape[-1:],
-                            "diagonal shape", diagonal_shape, Rel.EQ, self.name)
+            validator.check("diagonal shape", diagonal_shape, "x shape excluding the second last dimension",
+                            x_shape[:-2] + x_shape[-1:], Rel.EQ, self.name)
 
         return assist_shape
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index 7b7e8b2b641..1e28a56db1f 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -601,51 +601,6 @@ class SparseGatherV2(GatherV2):
         >>> out = P.SparseGatherV2()(input_params, input_indices, axis)
     """
 
-class EmbeddingLookup(PrimitiveWithInfer):
-    """
-    Returns a slice of input tensor based on the specified indices and axis. This Primitive has the similar
-    functionality as GatherV2, but has one more inputs: `offset`.
-    This primitive runs on the acipu devices.
-
-    Inputs:
-        - **params** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
-          The Tensor slice, instead of the entire Tensor.
-        - **indices** (Tensor) - The shape of tensor is :math:`(y_1, y_2, ..., y_S)`.
-          Specifies the indices of elements of the original Tensor. Values can be out of range of `params`,
-          and the exceeding part will be filled with 0 in the output.
-          The indices to do lookup operation whose data type should be mindspore.int32 or mindspore.int64.
-        - **offset** (int) - Specifies the offset value of this `params` slice. Thus the real indices
-          are equal to `indices` minus `offset`.
-
-
-    Outputs:
-        Tensor, the shape of tensor is :math:`(z_1, z_2, ..., z_N)`.
-
-    Examples:
-        >>> params = Tensor(np.array([[8, 9], [10, 11], [12, 13], [14, 15]]), mindspore.float32)
-        >>> indices = Tensor(np.array([[5, 2], [8, 5]]), mindspore.int32)
-        >>> offset = 4
-        >>> out = P.EmbeddingLookup()(params, indices, offset)
-        [[[10, 11], [0 ,0]], [[0, 0], [10, 11]]]
-    """
-    @prim_attr_register
-    def __init__(self):
-        """init index_select"""
-        self.init_prim_io_names(inputs=['params', 'indices', 'offset'],
-                                outputs=['output'])
-
-    def __infer__(self, params, indices, offset):
-        validator.check_subclass("params", params['dtype'], mstype.tensor, self.name)
-        valid_types = (mstype.int32, mstype.int64)
-        validator.check_tensor_type_same({"indices": indices['dtype']}, valid_types, self.name)
-        validator.check_subclass("offset", offset['dtype'], mstype.int_, self.name)
-        params_shp = params['shape']
-        out_shape = indices['shape'] + params_shp[1:]
-        out = {'shape': out_shape,
-               'dtype': params['dtype'],
-               'value': None}
-        return out
-
 
 class Split(PrimitiveWithInfer):
     """
@@ -688,8 +643,10 @@ class Split(PrimitiveWithInfer):
         validator.check_int_range('axis value', self.axis, -dim, dim, Rel.INC_LEFT, self.name)
         validator.check_integer("output_num", self.output_num, 0, Rel.GT, self.name)
         output_valid_check = x_shape[self.axis] % self.output_num
-        validator.check_integer("the dimension which to split divides output_num", output_valid_check, 0, Rel.EQ,
-                                self.name)
+        if output_valid_check != 0:
+            raise ValueError(f"x_shape[{self.axis}] {x_shape[self.axis]} must be divide exactly by"
+                             f" output_num {self.output_num}")
+
         x_shape[self.axis] = int(x_shape[self.axis] / self.output_num)
         out_shapes = []
         out_dtypes = []
@@ -1031,7 +988,7 @@ class InvertPermutation(PrimitiveWithInfer):
         values can not be negative.
 
     Inputs:
-        - **input_x** (Union(tuple[int]) - The input tuple is constructed by multiple
+        - **input_x** (Union(tuple[int], list[int]) - The input is constructed by multiple
           integers, i.e., :math:`(y_1, y_2, ..., y_S)` representing the indices.
           The values must include 0. There can be no duplicate values or negative values.
           Only constant value is allowed.
@@ -1059,6 +1016,12 @@ class InvertPermutation(PrimitiveWithInfer):
         validator.check_value_type("shape", x_shp, [tuple, list], self.name)
         if mstype.issubclass_(x['dtype'], mstype.tensor):
             raise ValueError(f'For \'{self.name}\' the input value must be non-Tensor.')
+        for shp in x_shp:
+            if shp != []:
+                x_rank = len(np.array(x_value, np.int64).shape)
+                raise ValueError(f'For \'{self.name}\' the rank of input must be 1, but got {x_rank}.')
+        for i, value in enumerate(x_value):
+            validator.check_value_type("input[%d]" % i, value, [int], self.name)
         z = [x_value[i] for i in range(len(x_value))]
         z.sort()
 
@@ -1457,6 +1420,58 @@ class UnsortedSegmentMin(PrimitiveWithInfer):
         return out
 
 
+class UnsortedSegmentProd(PrimitiveWithInfer):
+    """
+    Computes the product along segments of a tensor.
+
+    Inputs:
+        - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`.
+          With float16, float32 or int32 data type.
+        - **segment_ids** (Tensor) - A `1-D` tensor whose shape is :math:`(x_1)`. Data type must be int32.
+        - **num_segments** (int) - The value spcifies the number of distinct `segment_ids`,
+          should be greater than 0.
+
+    Outputs:
+        Tensor, Set the number of `num_segments` as `N`, the shape is :math:`(N, x_2, ..., x_R)`.
+
+    Examples:
+        >>> input_x = Tensor(np.array([[1, 2, 3], [4, 5, 6], [4, 2, 1]]).astype(np.float32))
+        >>> segment_ids = Tensor(np.array([0, 1, 0]).astype(np.int32))
+        >>> num_segments = 2
+        >>> unsorted_segment_prod = P.UnsortedSegmentProd()
+        >>> unsorted_segment_prod(input_x, segment_ids, num_segments)
+        [[4., 4., 3.], [4., 5., 6.]]
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init UnsortedSegmentProd"""
+        self.init_prim_io_names(inputs=['x', 'segment_ids', 'num_segments'], outputs=['y'])
+
+    def __infer__(self, x, segment_ids, num_segments):
+        x_type = x['dtype']
+        x_shape = x['shape']
+        segment_ids_shape = segment_ids['shape']
+        validator.check_subclass("input_x", x_type, mstype.tensor, self.name)
+        validator.check_value_type("x_shape", x_shape, [list], self.name)
+        valid_type = [mstype.float16, mstype.float32, mstype.int32]
+        validator.check_tensor_type_same({"x": x['dtype']}, valid_type, self.name)
+        validator.check_tensor_type_same({"segment_ids": segment_ids['dtype']}, [mstype.int32], self.name)
+        validator.check_integer("rank of segment_ids_shape", len(segment_ids_shape), 1, Rel.EQ, self.name)
+        validator.check(f'first shape of input_x', x_shape[0],
+                        'length of segments_id', segment_ids_shape[0], Rel.EQ, self.name)
+        num_segments_v = num_segments['value']
+        validator.check_value_type('num_segments', num_segments_v, [int], self.name)
+        validator.check_integer("num_segments", num_segments_v, 0, Rel.GT, self.name)
+        segment_ids_shape_len = len(segment_ids_shape)
+        out_shape = [num_segments_v]
+        out_shape += x_shape[segment_ids_shape_len:]
+        out = {'shape': out_shape,
+               'dtype': mstype.tensor_type(x_type.element_type()),
+               'value': None}
+        return out
+
+
 class Concat(PrimitiveWithInfer):
     r"""
     Concat tensor in specified axis.
@@ -1508,6 +1523,60 @@ class Concat(PrimitiveWithInfer):
         return out
 
 
+class ParallelConcat(PrimitiveWithInfer):
+    r"""
+    Concat tensor in the first dimension.
+
+    Concat input tensors along with the first dimension.
+
+    Note:
+        The input tensors are all required to have size 1 in the first dimension.
+
+    Inputs:
+        - **values** (tuple, list) - Tuple or list of input tensors. The data type and shape of these
+          tensors must be same.
+
+    Outputs:
+        Tensor, data type same as `values`.
+
+    Examples:
+        >>> data1 = Tensor(np.array([[0, 1]]).astype(np.int32))
+        >>> data2 = Tensor(np.array([[2, 1]]).astype(np.int32))
+        >>> op = P.ParallelConcat()
+        >>> output = op((data1, data2))
+        [[0, 1], [2, 1]]
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init ParallelConcat"""
+
+    def __infer__(self, values):
+        x_shp = values['shape']
+        x_type = values['dtype']
+
+        validator.check_integer(f'x_shp length', len(x_shp), 1, Rel.GE, self.name)
+
+        args = {f"x_type[{i}]": elem for i, elem in enumerate(x_type)}
+        validator.check_tensor_type_same(args, mstype.number_type + (mstype.bool_,), self.name)
+
+        first_elem = x_shp[0]
+        for i, elem in enumerate(x_shp[1:]):
+            j = i + 1
+            validator.check_integer(f'x_shp[{j}][0]', elem[0], 1, Rel.EQ, self.name)
+            validator.check(f"x_shp[0] shape", first_elem, f"x_shp[{j}] shape", elem, Rel.EQ, self.name)
+
+        ret_shp = x_shp[0].copy()
+        ret_shp[0] = len(x_shp)
+        self.add_prim_attr('shape', ret_shp)
+        self.add_prim_attr('N', len(x_shp))
+
+        out = {'shape': ret_shp,
+               'dtype': x_type[0],
+               'value': None}
+        return out
+
+
 def _get_pack_shape(x_shape, x_type, axis, prim_name):
     """for pack output shape"""
     validator.check_value_type("shape", x_shape, [tuple, list], prim_name)
@@ -3176,3 +3245,50 @@ class TransShape(PrimitiveWithInfer):
         return {'shape': shp,
                 'dtype': dtype,
                 'value': None}
+
+
+class EmbeddingLookup(PrimitiveWithInfer):
+    """
+    Returns a slice of input tensor based on the specified indices.
+
+    This Primitive has the similar functionality as GatherV2 operating on `axis = 0`, but has one more inputs:
+    `offset`.
+
+    Inputs:
+        - **input_params** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
+          The Tensor slice, instead of the entire Tensor.
+        - **input_indices** (Tensor) - The shape of tensor is :math:`(y_1, y_2, ..., y_S)`.
+          Specifies the indices of elements of the original Tensor. Values can be out of range of `input_params`,
+          and the exceeding part will be filled with 0 in the output.
+        - **offset** (int) - Specifies the offset value of this `input_params` slice. Thus the real indices
+          are equal to `input_indices` minus `offset`.
+
+    Outputs:
+        Tensor, the shape of tensor is :math:`(z_1, z_2, ..., z_N)`.
+
+    Examples:
+        >>> input_params = Tensor(np.array([[8, 9], [10, 11], [12, 13], [14, 15]]), mindspore.float32)
+        >>> input_indices = Tensor(np.array([[5, 2], [8, 5]]), mindspore.int32)
+        >>> offset = 4
+        >>> out = P.EmbeddingLookup()(input_params, input_indices, offset)
+        [[[10, 11], [0 ,0]], [[0, 0], [10, 11]]]
+    """
+    @prim_attr_register
+    def __init__(self):
+        """init index_select"""
+        self.__setattr_flag__ = True
+        self.init_prim_io_names(inputs=['params', 'indices', 'offset'],
+                                outputs=['output'])
+
+    def __infer__(self, params, indices, offset):
+        validator.check_subclass("params", params['dtype'], mstype.tensor, self.name)
+        validator.check_tensor_type_same({"indices": indices['dtype']}, mstype.int_type, self.name)
+        validator.check_subclass("offset", offset['dtype'], mstype.int_, self.name)
+        params_shp = params['shape']
+        if len(params_shp) != 2:
+            raise ValueError("The dimension of 'params' in EmbeddingLookup must be 2, but got %d." % len(params_shp))
+        out_shape = indices['shape'] + params_shp[1:]
+        out = {'shape': out_shape,
+               'dtype': params['dtype'],
+               'value': None}
+        return out
diff --git a/mindspore/ops/operations/image_ops.py b/mindspore/ops/operations/image_ops.py
index 1e366b5ea67..437cda3301e 100644
--- a/mindspore/ops/operations/image_ops.py
+++ b/mindspore/ops/operations/image_ops.py
@@ -117,8 +117,8 @@ class CropAndResize(PrimitiveWithInfer):
         validator.check("crop_height", crop_size_value[0], "minimum", 0, Rel.GT, self.name)
         validator.check("crop_width", crop_size_value[1], "minimum", 0, Rel.GT, self.name)
         # check crop_size element type
-        validator.check("crop_height dtype", crop_size_dtype[0], mstype.int32, self.name)
-        validator.check("crop_width dtype", crop_size_dtype[1], mstype.int32, self.name)
+        validator.check("crop_height dtype", crop_size_dtype[0], "expected", mstype.int32, Rel.EQ, self.name)
+        validator.check("crop_width dtype", crop_size_dtype[1], "expected", mstype.int32, Rel.EQ, self.name)
 
         num_boxes = boxes_shape[0]
         crop_height = crop_size_value[0]
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 9acd75d8e42..a9bdf07d288 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -234,7 +234,7 @@ class Softsign(PrimitiveWithInfer):
         \text{output} = \frac{\text{input_x}}{1 + \abs{\text{input_x}}},
 
     Inputs:
-        - **input_x** (Tensor) - The input tensor whose data type should be float.
+        - **input_x** (Tensor) - The input tensor whose data type should be float16 or float32.
 
     Outputs:
         Tensor, with the same type and shape as the `input_x`.
@@ -255,7 +255,7 @@ class Softsign(PrimitiveWithInfer):
         return input_x
 
     def infer_dtype(self, input_x):
-        validator.check_tensor_type_same({'input_x': input_x}, mstype.float_type, self.name)
+        validator.check_tensor_type_same({'input_x': input_x}, [mstype.float16, mstype.float32], self.name)
         return input_x
 
 
@@ -1014,6 +1014,8 @@ class DepthwiseConv2dNative(PrimitiveWithInfer):
     def infer_dtype(self, x_dtype, w_dtype):
         args = {'x': x_dtype, 'w': w_dtype}
         validator.check_tensor_type_same(args, mstype.number_type, self.name)
+        if x_dtype.element_type() == mstype.int8:
+            return mstype.tensor_type(mstype.int32)
         return x_dtype
 
 
@@ -1930,7 +1932,7 @@ class ApplyRMSProp(PrimitiveWithInfer):
         >>> decay = 0.0
         >>> momentum = 1e-10
         >>> epsilon = 0.001
-        >>> result = apply_rms(input_x, mean_square, moment, grad, learning_rate, decay, momentum, epsilon)
+        >>> result = apply_rms(input_x, mean_square, moment, learning_rate, grad, decay, momentum, epsilon)
         (-2.9977674, 0.80999994, 1.9987665)
     """
 
@@ -2772,6 +2774,7 @@ class ROIAlign(PrimitiveWithInfer):
             feature map coordinates. Suppose the height of a RoI is `ori_h` in the raw image and `fea_h` in the
             input feature map, the `spatial_scale` should be `fea_h / ori_h`.
         sample_num (int): Number of sampling points. Default: 2.
+        roi_end_mode (int): Number must be 0 or 1. Default: 1.
 
     Inputs:
         - **features** (Tensor) - The input features, whose shape should be `(N, C, H, W)`.
@@ -2788,22 +2791,25 @@ class ROIAlign(PrimitiveWithInfer):
     Examples:
         >>> input_tensor = Tensor(np.array([[[[1., 2.], [3., 4.]]]]), mindspore.float32)
         >>> rois = Tensor(np.array([[0, 0.2, 0.3, 0.2, 0.3]]), mindspore.float32)
-        >>> roi_align = P.ROIAlign(1, 1, 0.5, 2)
+        >>> roi_align = P.ROIAlign(2, 2, 0.5, 2)
         >>> output_tensor = roi_align(input_tensor, rois)
         >>> assert output_tensor == Tensor(np.array([[[[2.15]]]]), mindspore.float32)
     """
 
     @prim_attr_register
-    def __init__(self, pooled_height, pooled_width, spatial_scale, sample_num=2):
+    def __init__(self, pooled_height, pooled_width, spatial_scale, sample_num=2, roi_end_mode=1):
         """init ROIAlign"""
         validator.check_value_type("pooled_height", pooled_height, [int], self.name)
         validator.check_value_type("pooled_width", pooled_width, [int], self.name)
         validator.check_value_type("spatial_scale", spatial_scale, [float], self.name)
         validator.check_value_type("sample_num", sample_num, [int], self.name)
+        validator.check_value_type("roi_end_mode", roi_end_mode, [int], self.name)
+        validator.check_int_range("roi_end_mode", roi_end_mode, 0, 1, Rel.INC_BOTH, self.name)
         self.pooled_height = pooled_height
         self.pooled_width = pooled_width
         self.spatial_scale = spatial_scale
         self.sample_num = sample_num
+        self.roi_end_mode = roi_end_mode
 
     def infer_shape(self, inputs_shape, rois_shape):
         return [rois_shape[0], inputs_shape[1], self.pooled_height, self.pooled_width]
@@ -4803,19 +4809,19 @@ class CTCLoss(PrimitiveWithInfer):
         preprocess_collapse_repeated (bool): If True, repeated labels are collapsed prior to the CTC calculation.
                                              Default: False.
         ctc_merge_repeated (bool): If False, during CTC calculation, repeated non-blank labels will not be merged
-                                   and are interpreted as individual labels. This is a simplfied version if CTC.
+                                   and are interpreted as individual labels. This is a simplfied version of CTC.
                                    Default: True.
         ignore_longer_outputs_than_inputs (bool): If True, sequences with longer outputs than inputs will be ignored.
                                                   Default: False.
 
     Inputs:
         - **inputs** (Tensor) - The input Tensor should be a `3-D` tensor whose shape is
-          :math:`(max_time, batch_size, num_class)`. `num_class` should be `num_labels + 1` classes, `num_labels`
-          indicates the number of actual labels. Blank labels are reserved.
+          :math:`(max_time, batch_size, num_classes)`. `num_classes` should be `num_labels + 1` classes, `num_labels`
+          indicates the number of actual labels. Blank labels are reserved. Default blank label is `num_classes - 1`.
         - **labels_indices** (Tensor) - The indices of labels. `labels_indices[i, :] == [b, t]` means `labels_values[i]`
           stores the id for `(batch b, time t)`. The type must be int64 and rank must be 2.
         - **labels_values** (Tensor) - A `1-D` input tensor. The values associated with the given batch and time. The
-          type must be int32. `labels_values[i]` must in the range of `[0, num_class)`.
+          type must be int32. `labels_values[i]` must in the range of `[0, num_classes)`.
         - **sequence_length** (Tensor) - A tensor containing sequence lengths with the shape of :math:`(batch_size)`.
           The type must be int32. Each value in the tensor should not greater than `max_time`.
 
@@ -4849,6 +4855,7 @@ class CTCLoss(PrimitiveWithInfer):
     def infer_shape(self, inputs, labels_indices, labels_values, sequence_length):
         validator.check_integer("inputs rank", len(inputs), 3, Rel.EQ, self.name)
         validator.check_integer("labels_indices rank", len(labels_indices), 2, Rel.EQ, self.name)
+        validator.check_integer("labels_indices dim one", labels_indices[1], 2, Rel.EQ, self.name)
         validator.check_integer("labels_values rank", len(labels_values), 1, Rel.EQ, self.name)
         validator.check_integer("sequence_length rank", len(sequence_length), 1, Rel.EQ, self.name)
         validator.check('labels_indices size', labels_indices[0], 'labels_values size',
@@ -5027,8 +5034,7 @@ class LRN(PrimitiveWithInfer):
         bias (float): An offset (usually positive to avoid dividing by 0).
         alpha (float): A scale factor, usually positive.
         beta (float): An exponent.
-        norm_region (str): Specify normalization region. Options: "ACROSS_CHANNELS", "WITHIN_CHANNEL".
-                           Default: "ACROSS_CHANNELS".
+        norm_region (str): Specify normalization region. Options: "ACROSS_CHANNELS". Default: "ACROSS_CHANNELS".
 
     Inputs:
         - **x** (Tensor) - A 4D Tensor with float16 or float32 data type.
@@ -5050,10 +5056,66 @@ class LRN(PrimitiveWithInfer):
         validator.check_value_type("alpha", alpha, [float], self.name)
         validator.check_value_type("beta", beta, [float], self.name)
         validator.check_value_type("norm_region", norm_region, [str], self.name)
+        validator.check_string('norm_region', norm_region, ['ACROSS_CHANNELS'], self.name)
+        validator.check_integer("depth_radius", depth_radius, 0, Rel.GE, self.name)
 
     def infer_dtype(self, x_dtype):
         validator.check_tensor_type_same({"x": x_dtype}, (mstype.float16, mstype.float32,), self.name)
         return x_dtype
 
     def infer_shape(self, x_shape):
+        validator.check_integer("x_shape", len(x_shape), 4, Rel.EQ, self.name)
         return x_shape
+
+class CTCLossV2(PrimitiveWithInfer):
+    r"""
+    Calculates the CTC(Connectionist Temporal Classification) loss. Also calculates the gradient.
+    Note:
+        - Cudnn Uses label value of for the `blank`
+
+    Inputs:
+        - **inputs** (Tensor) - The input Tensor should be a `3-D` tensor whose shape is
+          :math:`(max_time, batch_size, num_class)`. `num_class` should be `num_labels + 1` classes, `num_labels`
+          indicates the number of actual labels. Blank labels are reserved.
+        - **labels** (Tensor) - The labels Tensor should be a `1-D` tensor whose shape is
+          :math:`(\sigma{label_lengths})`
+          or `2-D` tensor whose shape is
+          :math:`(max_time, max{label_lengths})`
+          The type must be int32.
+        - **input_lengths** (Tensor) - A `1-D` input tensor whose shape is
+          :math:`(batch_size,)`. The values should be batch. The type must be int32.
+        - **label_lengths** (Tensor) - A tensor containing sequence lengths with the shape of :math:`(batch_size)`.
+          The type must be int32. Each value in the tensor should not greater than `max_time`.
+
+    Outputs:
+        - **loss** (Tensor) - A tensor containing log-probabilities, the shape is :math:`(batch_size)`. Has the same
+          type with `inputs`.
+        - **gradient** (Tensor) - The gradient of `loss`. Has the same type and shape with `inputs`.
+
+    Examples:
+        >>> inputs = Tensor(np.random.random((2, 2, 3)), mindspore.float32)
+        >>> labels = Tensor(np.array([[0, 0], [1, 0]]), mindspore.int32)
+        >>> input_lengths = Tensor(np.array([3, 3, 3]), mindspore.int32)
+        >>> label_lengths = Tensor(np.array([3, 3, 3]), mindspore.int32)
+        >>> ctc_loss = P.CTCLossV2()
+        >>> output = ctc_loss(inputs, labels, input_lengths, label_lengths)
+    """
+    @prim_attr_register
+    def __init__(self):
+        pass
+
+    def infer_dtype(self, input_dtype, labels_dtype, input_lengths_dtype, label_lengths_dtype):
+        validator.check_tensor_type_same({"input": input_dtype}, (mstype.float32,), self.name)
+        validator.check_tensor_type_same({"labels": labels_dtype}, (mstype.int32,), self.name)
+        validator.check_tensor_type_same({"input_lengths": input_lengths_dtype}, (mstype.int32,), self.name)
+        validator.check_tensor_type_same({"target_lengths": label_lengths_dtype}, (mstype.int32,), self.name)
+        return mstype.float32, mstype.float32
+
+    def infer_shape(self, input_shape, labels_shape, input_lengths_shape, label_lengths_shape):
+        validator.check_integer("input shape", len(input_shape), 3, Rel.EQ, self.name)
+        validator.check_number_range("labels shape", len(labels_shape), 1, 2, Rel.INC_BOTH, self.name)
+        validator.check_integer("input lengths shape", len(input_lengths_shape), 1, Rel.EQ, self.name)
+        validator.check_integer("label lengths shape", len(label_lengths_shape), 1, Rel.EQ, self.name)
+        validator.check_integer("input[1]", input_shape[1], input_lengths_shape[0], Rel.EQ, self.name)
+        validator.check_integer("input[1]", input_shape[1], label_lengths_shape[0], Rel.EQ, self.name)
+        return (input_shape[1],), input_shape
diff --git a/mindspore/ops/operations/other_ops.py b/mindspore/ops/operations/other_ops.py
index b6b938d800b..a58403f8834 100644
--- a/mindspore/ops/operations/other_ops.py
+++ b/mindspore/ops/operations/other_ops.py
@@ -51,6 +51,7 @@ class Assign(PrimitiveWithInfer):
         ('variable', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
         ('value', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T)
     )
+
     @prim_attr_register
     def __init__(self):
         self.init_prim_io_names(inputs=['ref', 'value'], outputs=['output'])
@@ -59,7 +60,9 @@ class Assign(PrimitiveWithInfer):
         return variable
 
     def infer_dtype(self, variable, value):
-        # Add a type validation later when we don't have to assign a value to RefKey.
+        if variable != mstype.type_refkey:
+            validator.check_tensor_type_same({"variable": variable}, mstype.number_type, self.name)
+        validator.check_scalar_or_tensor_type_same({"value": value}, mstype.number_type, self.name)
         return variable
 
 
@@ -324,6 +327,7 @@ class Partial(Primitive):
         partial_func = functools.partial(func, *args[1:])
         return partial_func
 
+
 class Depend(Primitive):
     """
     Depend is used for process side-effect operations.
@@ -457,3 +461,83 @@ class ConfusionMatrix(PrimitiveWithInfer):
         args = {"labels": labels, "predictions": predictions}
         validator.check_tensor_type_same(args, (mstype.number_type), self.name)
         return labels
+
+
+class PopulationCount(PrimitiveWithInfer):
+    r"""
+    Calculate population count.
+
+    Inputs:
+        - **input** (Tensor) -  The data type should be int16 or uint16.
+
+    Outputs:
+        Tensor, with shape same as the input.
+
+    Examples:
+        >>> population_count = P.PopulationCount()
+        >>> x_input = Tensor([0, 1, 3], mindspore.int16)
+        >>> population_count(x_input)
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        pass
+
+    def infer_shape(self, x_shape):
+        return x_shape
+
+    def infer_dtype(self, x_dtype):
+        args = {"x": x_dtype}
+        validator.check_tensor_type_same(args, (mstype.int16, mstype.uint16,), self.name)
+        return mstype.tensor_type(mstype.uint8)
+
+class Push(PrimitiveWithInfer):
+    """
+    Pushing the inputs of the corresponding optimizer to parameter server.
+
+    Args:
+        optim_type (string): The optimizer type. Default: 'ApplyMomentum'.
+        only_shape_indices (list): The indices of input of which only shape
+                                   will be pushed to parameter server. Default: None.
+
+    Inputs:
+        - **optim_inputs** (tuple) - The inputs for this kind of optimizer.
+        - **optim_input_shapes** (tuple) - The shapes of the inputs.
+
+    Outputs:
+        Tensor, the key of the weight which needs to be updated.
+    """
+
+    @prim_attr_register
+    def __init__(self, optim_type='ApplyMomentum', only_shape_indices=None):
+        """init Push"""
+        self.init_prim_io_names(inputs=['optim_inputs', 'optim_input_shapes'], outputs=['key'])
+
+    def infer_shape(self, inputs, shapes):
+        return [1]
+
+    def infer_dtype(self, inputs, shapes):
+        return mstype.uint64
+
+class Pull(PrimitiveWithInfer):
+    """
+    Pulling weight from parameter server.
+
+    Inputs:
+        - **key** (Tensor) - The key of the weight.
+        - **weight** (Tensor) - The weight to be updated.
+
+    Outputs:
+        None.
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init Pull"""
+        self.init_prim_io_names(inputs=['key', 'weight'], outputs=['output'])
+
+    def infer_shape(self, key_shape, weight_shape):
+        return [1]
+
+    def infer_dtype(self, key_dtype, weight_dtype):
+        return mstype.float32
diff --git a/mindspore/ops/primitive.py b/mindspore/ops/primitive.py
index 7ceb6877780..cb34e9ff24b 100644
--- a/mindspore/ops/primitive.py
+++ b/mindspore/ops/primitive.py
@@ -146,7 +146,7 @@ class Primitive(Primitive_):
         Check whether or not certain inputs should go into backend. Subclass in need should override this method.
 
         Args:
-            Same as arguments of current Primitive
+            *args(Primitive args): Same as arguments of current Primitive.
 
         Returns:
             A tuple of two elements, first element indicates whether or not we should filter out current arguments;
@@ -237,12 +237,14 @@ class PrimitiveWithInfer(Primitive):
         """
         Infer output shape based on input shape.
 
-        Args:
-            inputs (tuple(int)): dimensions of input tensors.
-            outputs (tuple(int)): dimensions of output tensors.
-
         Note:
             The shape of scalar is an empty tuple.
+
+        Args:
+            args (tuple(int)): shapes of input tensors.
+
+        Return:
+            `tuple(int)`, shapes of output tensors.
         """
         return None
 
@@ -251,8 +253,10 @@ class PrimitiveWithInfer(Primitive):
         Infer output dtype based on input dtype.
 
         Args:
-            inputs (mstype): data type of inputs.
-            outputs (mstype): data type of outputs.
+            args (:class:`mindspore.dtype`): data type of inputs.
+
+        Return:
+            :class:`mindspore.dtype`, data type of outputs.
         """
         return None
 
@@ -261,8 +265,10 @@ class PrimitiveWithInfer(Primitive):
         Infer output value based on input value at compile time.
 
         Args:
-            inputs (any): value of inputs.
-            outputs (any): value of outputs.
+            args (Any): value of inputs.
+
+        Return:
+            Value of outputs. Return `None` for, cat not infer the value at compile time.
         """
         return None
 
diff --git a/mindspore/parallel/_utils.py b/mindspore/parallel/_utils.py
index c5b4d57702d..68f070d4a5b 100644
--- a/mindspore/parallel/_utils.py
+++ b/mindspore/parallel/_utils.py
@@ -122,47 +122,6 @@ def _parameter_broadcast_check(parallel_mode, parameter_broadcast):
                          "do not support parameter broadcast, parallel_mode: {0}, parameter_broadcast:{1}"
                          .format(parallel_mode, parameter_broadcast))
 
-
-PARAMETER_CLONED_INDEX = 0
-
-
-class _CloneInfo():
-    """
-    The clone info of parameter.
-
-    Attributes:
-        be_cloned (bool): Whether the parameter is cloned.
-        cloned (bool): Whether the parameter clone from other parameter.
-        be_cloned_index (tuple): If the parameter is cloned, generate one index per clone.
-        cloned_index (int): If the parameter clone from other parameter, it has a unique index.
-    """
-    def __init__(self):
-        self.be_cloned = False
-        self.cloned = False
-        self.be_cloned_index = []
-        self.cloned_index = None
-
-
-def _set_clone_info(clone_from, clone_to):
-    """
-    Set the clone info.
-
-    Args:
-        clone_from (_CloneInfo): The clone info of be_cloned parameter.
-        clone_to (_CloneInfo): The clone info of cloned parameter.
-    """
-    global PARAMETER_CLONED_INDEX
-    clone_to.be_cloned = False
-    clone_to.cloned = True
-    clone_to.be_cloned_index = []
-    clone_to.cloned_index = PARAMETER_CLONED_INDEX
-
-    clone_from.be_cloned = True
-    clone_from.be_cloned_index.append(PARAMETER_CLONED_INDEX)
-
-    PARAMETER_CLONED_INDEX = PARAMETER_CLONED_INDEX + 1
-
-
 def _get_python_op(op_name, op_path, instance_name, arglist):
     """Get python operator."""
     module = __import__(op_path, fromlist=["None"])
diff --git a/mindspore/train/callback/_loss_monitor.py b/mindspore/train/callback/_loss_monitor.py
index 766777e8789..15a095c5cb1 100644
--- a/mindspore/train/callback/_loss_monitor.py
+++ b/mindspore/train/callback/_loss_monitor.py
@@ -14,7 +14,6 @@
 # ============================================================================
 """LossMonitor Callback class."""
 
-import time
 import numpy as np
 from mindspore.common.tensor import Tensor
 
@@ -32,62 +31,32 @@ class LossMonitor(Callback):
 
     Args:
         per_print_times (int): Print loss every times. Default: 1.
-        lr_init (numpy array): train learning rate. Default: None.
 
     Raises:
         ValueError: If print_step is not int or less than zero.
-
-    Examples:
-        >>> LossMonitor(100, lr_init=Tensor([0.05]*100).asnumpy())
     """
 
-    def __init__(self, per_print_times=1, lr_init=None):
+    def __init__(self, per_print_times=1):
         super(LossMonitor, self).__init__()
         if not isinstance(per_print_times, int) or per_print_times < 0:
             raise ValueError("print_step must be int and >= 0.")
         self._per_print_times = per_print_times
-        self.lr_init = lr_init
-
-    def epoch_begin(self, run_context):
-        self.losses = []
-        self.epoch_time = time.time()
-
-    def epoch_end(self, run_context):
-        cb_params = run_context.original_args()
-        epoch_mseconds = (time.time() - self.epoch_time) * 1000
-        per_step_mseconds = epoch_mseconds / cb_params.batch_num
-        print("Epoch time: {:5.3f}, per step time: {:5.3f}, "
-              "avg loss: {:5.3f}".format(epoch_mseconds,
-                                         per_step_mseconds,
-                                         np.mean(self.losses)))
-        print("*" * 60)
-
-    def step_begin(self, run_context):
-        self.step_time = time.time()
 
     def step_end(self, run_context):
         cb_params = run_context.original_args()
-        step_mseconds = (time.time() - self.step_time) * 1000
-        step_loss = cb_params.net_outputs
+        loss = cb_params.net_outputs
 
-        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
-            step_loss = step_loss[0]
-        if isinstance(step_loss, Tensor):
-            step_loss = np.mean(step_loss.asnumpy())
+        if isinstance(loss, (tuple, list)):
+            if isinstance(loss[0], Tensor) and isinstance(loss[0].asnumpy(), np.ndarray):
+                loss = loss[0]
 
-        self.losses.append(step_loss)
-        cur_step_in_epoch = int((cb_params.cur_step_num - 1) % cb_params.batch_num) + 1
+        if isinstance(loss, Tensor) and isinstance(loss.asnumpy(), np.ndarray):
+            loss = np.mean(loss.asnumpy())
 
-        if isinstance(step_loss, float) and (np.isnan(step_loss) or np.isinf(step_loss)):
-            raise ValueError("Epoch: [{:3d}/{:3d}], step: [{:5d}/{:5d}]. "
-                             "Invalid loss, terminating training.".format(
-                                 cb_params.cur_epoch_num - 1, cb_params.epoch_num,
-                                 cur_step_in_epoch, cb_params.batch_num))
+        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
 
+        if isinstance(loss, float) and (np.isnan(loss) or np.isinf(loss)):
+            raise ValueError("epoch: {} step: {}. Invalid loss, terminating training.".format(
+                cb_params.cur_epoch_num, cur_step_in_epoch))
         if self._per_print_times != 0 and cb_params.cur_step_num % self._per_print_times == 0:
-            print("Epoch: [{:3d}/{:3d}], step: [{:5d}/{:5d}], "
-                  "loss: [{:5.4f}], avg los: [{:5.4f}], time: [{:5.4f}ms]".format(
-                      cb_params.cur_epoch_num, cb_params.epoch_num,
-                      cur_step_in_epoch, int(cb_params.batch_num),
-                      step_loss, np.mean(self.losses),
-                      step_mseconds), flush=True)
+            print("epoch: %s step: %s, loss is %s" % (cb_params.cur_epoch_num, cur_step_in_epoch, loss), flush=True)
diff --git a/mindspore/train/callback/_summary_collector.py b/mindspore/train/callback/_summary_collector.py
index 1550c3c55ce..ded0e9a650b 100644
--- a/mindspore/train/callback/_summary_collector.py
+++ b/mindspore/train/callback/_summary_collector.py
@@ -126,10 +126,12 @@ class SummaryCollector(Callback):
         >>>
         >>> # Only collect metric, custom lineage data and record data that collected by the summary operator,
         >>> # others are not collected
-        >>> specified = {'collect_metric':True, 'custom_lineage_data': {'version': 'resnet50_v1'}}
+        >>> specified = {'collect_metric': True}
         >>> summary_collector = SummaryCollector('./summary_dir',
         >>>                                      collect_specified_data=specified,
-        >>>                                      keep_default_action=False)
+        >>>                                      keep_default_action=False,
+        >>>                                      custom_lineage_data={'version': 'resnet50_v1'}
+        >>>                                      )
         >>> model.train(epoch, dataset, callbacks=summary_collector)
     """
 
diff --git a/mindspore/train/dataset_helper.py b/mindspore/train/dataset_helper.py
index 14797e568b9..75e1deabc44 100644
--- a/mindspore/train/dataset_helper.py
+++ b/mindspore/train/dataset_helper.py
@@ -14,6 +14,7 @@
 # ============================================================================
 """Dataset help for minddata dataset"""
 import math
+import os
 
 from mindspore._checkparam import check_bool
 from .. import context
@@ -60,7 +61,11 @@ class DatasetHelper:
                 if context.get_context("device_target") == "Ascend":
                     iterclass = _DatasetIterMSLoopSink
                 elif context.get_context("device_target") == "GPU":
-                    iterclass = _DatasetIterMS
+                    ms_role = os.getenv("MS_ROLE")
+                    if ms_role in ("MS_PSERVER", "MS_SCHED"):
+                        iterclass = _DatasetIterPSLite
+                    else:
+                        iterclass = _DatasetIterMS
                 elif context.get_context("device_target") == "CPU":
                     raise RuntimeError("Currently dataset sink mode is not supported when the device target is CPU.")
         else:
@@ -131,6 +136,9 @@ class _DatasetIterMSLoopSink(_DatasetIter):
     def __init__(self, dataset):
         super(_DatasetIterMSLoopSink, self).__init__(dataset)
         self.loop_count = self.get_loop_count(dataset)
+        ms_role = os.getenv("MS_ROLE")
+        if ms_role in ("MS_PSERVER", "MS_SCHED"):
+            self.loop_count = 1
         # for self._parallel_mode equal to semi_auto_parallel or auto_parallel, and not using full_batch,
         # use a complete tensor to compile, and slice tensor to run. The batch dimension of tensors for
         # compile is device_number times the batch dimension of tensors for run. Now only support LoopSink.
@@ -154,6 +162,18 @@ class _DatasetIterMS(_DatasetIter):
         self.op = GetNextSingleOp(self.dataset_types, self.dataset_shapes, queue_name)
 
 
+class _DatasetIterPSLite(_DatasetIter):
+    """Iter for context (device_target=GPU) on MS_PSERVER or MS_SCHED"""
+    def __init__(self, dataset):
+        super(_DatasetIterPSLite, self).__init__(dataset)
+        self.loop_count = 1
+        self.loop_size = 1
+        self.op = None
+        def op():
+            return _construct_tensor_list(self.dataset_types, self.dataset_shapes, batch_expand_num=1)
+        self.op = op
+
+
 class _DatasetIterGE(_DatasetIter):
     """Iter for ge"""
     def __init__(self, dataset):
diff --git a/mindspore/train/model.py b/mindspore/train/model.py
index 79bd6bc90ba..74fd668e820 100755
--- a/mindspore/train/model.py
+++ b/mindspore/train/model.py
@@ -15,6 +15,7 @@
 """Model."""
 from collections.abc import Iterable
 
+import os
 import numpy as np
 
 from mindspore import log as logger
@@ -350,6 +351,9 @@ class Model:
         cb_params.train_dataset = train_dataset
         cb_params.list_callback = self._transform_callbacks(callbacks)
         cb_params.train_dataset_element = None
+        ms_role = os.getenv("MS_ROLE")
+        if ms_role in ("MS_PSERVER", "MS_SCHED"):
+            epoch = 1
 
         # build callback list
         with _CallbackManager(callbacks) as list_callback:
diff --git a/mindspore/train/quant/quant.py b/mindspore/train/quant/quant.py
index bc44ba22c27..b553373f105 100644
--- a/mindspore/train/quant/quant.py
+++ b/mindspore/train/quant/quant.py
@@ -33,8 +33,10 @@ from ...ops.operations import _inner_ops as inner
 from ...train import serialization
 from . import quant_utils
 
-_ACTIVATION_MAP = {nn.ReLU: quant.ReLUQuant,
-                   nn.ReLU6: quant.ReLU6Quant,
+_ACTIVATION_MAP = {nn.ReLU: quant.ActQuant,
+                   nn.ReLU6: quant.ActQuant,
+                   nn.LeakyReLU: quant.ActQuant,
+                   nn.Sigmoid: quant.ActQuant,
                    nn.HSigmoid: quant.HSigmoidQuant,
                    nn.HSwish: quant.HSwishQuant}
 
@@ -112,7 +114,6 @@ class ConvertToQuantNetwork:
     def run(self):
         self.network.update_cell_prefix()
         network = self._convert_subcells2quant(self.network)
-        network = _AddFakeQuantInput(network)
         self.network.update_cell_type("quant")
         return network
 
@@ -257,9 +258,9 @@ class ConvertToQuantNetwork:
     def _convert_activation(self, activation):
         act_class = activation.__class__
         if act_class not in _ACTIVATION_MAP:
-            raise ValueError(
-                "Unsupported activation in auto quant: ", act_class)
-        return _ACTIVATION_MAP[act_class](num_bits=self.act_bits,
+            raise ValueError("Unsupported activation in auto quant: ", act_class)
+        return _ACTIVATION_MAP[act_class](activation=act_class,
+                                          num_bits=self.act_bits,
                                           quant_delay=self.act_qdelay,
                                           per_channel=self.act_channel,
                                           symmetric=self.act_symmetric,
@@ -273,16 +274,20 @@ class ExportToQuantInferNetwork:
     Args:
         network (Cell): MindSpore network API `convert_quant_network`.
         inputs (Tensor): Input tensors of the `quantization aware training network`.
+        mean (int): Input data mean. Default: 127.5.
+        std_dev (int, float): Input data variance. Default: 127.5.
 
     Returns:
         Cell, GEIR backend Infer network.
     """
     __quant_op_name__ = ["TensorAdd", "Sub", "Mul", "RealDiv"]
 
-    def __init__(self,
-                 network,
-                 *inputs):
+    def __init__(self, network, mean, std_dev, *inputs):
         network = validator.check_isinstance('network', network, (nn.Cell,))
+        # quantize for inputs: q = f / scale + zero_point
+        # dequantize for outputs: f = (q - zero_point) * scale
+        self.input_scale = round(mean)
+        self.input_zero_point = 1 / std_dev
         self.data_type = mstype.int8
         self.network = copy.deepcopy(network)
         self.all_parameters = {p.name: p for p in self.network.get_parameters()}
@@ -313,11 +318,14 @@ class ExportToQuantInferNetwork:
         info = self.quant_info_table.get(w_minq_name, None)
         if info:
             fack_quant_a_in_op, minq_name = info
-            maxq = self.all_parameters[minq_name[:-4] + "maxq"]
-            minq = self.all_parameters[minq_name]
-            scale_a_in, zp_a_in = quant_utils.scale_zp_from_data(fack_quant_a_in_op, maxq, minq, np_type)
+            if minq_name == 'input':
+                scale_a_in, zp_a_in = self.input_scale, self.input_zero_point
+            else:
+                maxq = self.all_parameters[minq_name[:-4] + "maxq"]
+                minq = self.all_parameters[minq_name]
+                scale_a_in, zp_a_in = quant_utils.scale_zp_from_data(fack_quant_a_in_op, maxq, minq, np_type)
         else:
-            logger.warning(f"Do not find `fake_quant` from input with `fack_quant.minq` {w_minq_name}")
+            logger.warning(f"Do not find `fake_quant` from input with `fake_quant.minq` {w_minq_name}")
             return None
 
         # Build the `Quant` `Dequant` op.
@@ -325,7 +333,7 @@ class ExportToQuantInferNetwork:
         quant_op = inner.AscendQuant(float(scale_a_in), float(zp_a_in))
         sqrt_mode = False
         scale_deq = scale_a_out * scale_w
-        if scale_deq < 2 ** -14:
+        if (scale_deq < 2 ** -14).all():
             scale_deq = np.sqrt(scale_deq)
             sqrt_mode = True
         dequant_op = inner.AscendDequant(sqrt_mode)
@@ -393,7 +401,7 @@ class ExportToQuantInferNetwork:
         return network
 
 
-def export(network, *inputs, file_name, file_format='GEIR'):
+def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format='GEIR'):
     """
     Exports MindSpore quantization predict model to deploy with GEIR.
 
@@ -401,16 +409,27 @@ def export(network, *inputs, file_name, file_format='GEIR'):
         network (Cell): MindSpore network produced by `convert_quant_network`.
         inputs (Tensor): Inputs of the `quantization aware training network`.
         file_name (str): File name of model to export.
+        mean (int): Input data mean. Default: 127.5.
+        std_dev (int, float): Input data variance. Default: 127.5.
         file_format (str): MindSpore currently supports 'GEIR' format for exported quantization aware model.
             - GEIR: Graph Engine Intermediate Representation. An Intermediate representation format of Ascend model.
     """
+    supported_device = ["Ascend"]
     supported_formats = ['GEIR']
 
+    mean = validator.check_type("mean", mean, (int, float))
+    std_dev = validator.check_type("std_dev", std_dev, (int, float))
+
+    if context.get_context('device_target') not in supported_device:
+        raise KeyError("Unsupported {} device target.".format(context.get_context('device_target')))
+
     if file_format not in supported_formats:
         raise ValueError('Illegal file format {}.'.format(file_format))
 
+    network.set_train(False)
+
     if file_format == 'GEIR':
-        exporter = ExportToQuantInferNetwork(network, *inputs)
+        exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs)
         deploy_net = exporter.run()
         serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format)
 
diff --git a/mindspore/train/quant/quant_utils.py b/mindspore/train/quant/quant_utils.py
index c4a8004012a..69505970fd8 100644
--- a/mindspore/train/quant/quant_utils.py
+++ b/mindspore/train/quant/quant_utils.py
@@ -45,7 +45,7 @@ def cal_quantization_params(input_min,
         raise ValueError("input min shape should equal to input max.")
     if len(input_min.shape) > 1:
         raise ValueError("input min and max shape should be one dim.")
-    if input_min > input_max:
+    if (input_min > input_max).all():
         raise ValueError("input_min min should less than input max.")
     if (input_max == input_min).all():
         # scale = 1.0, zp = 0.0
@@ -85,9 +85,7 @@ def cal_quantization_params(input_min,
     return scale, zp
 
 
-def weight2int(data,
-               scale,
-               zero_point):
+def weight2int(data, scale, zero_point):
     r"""
     Calculate int8/uint8 weight from fp32. the formula is defined as:
 
@@ -103,12 +101,25 @@ def weight2int(data,
         weight (numpy.ndarray): The dimension of channel or 1.
     """
     if scale.shape != zero_point.shape:
-        raise ValueError("scale and zero_point should have the same shape.")
-    if scale.shape[0] > 0:
-        scale = scale.reshape(1, -1)
-        zero_point = zero_point.reshape(1, -1)
+        raise ValueError("`scale` and `zero_point` should have the same shape.")
+    if scale.shape[0] < 0:
+        raise ValueError("`scale` and `zero_point` shape should greater than zero.")
+    if len(scale.shape) > 1:
+        # for perchannel
+        if scale.shape[0] == data.shape[0]:
+            # `Conv2d` or `Dense` op weight
+            shape_list = [-1] + [1] * len(data.shape[1:])
+            scale = scale.reshape(shape_list)
+            zero_point = zero_point.reshape(shape_list)
+        elif scale.shape[0] == data.shape[1]:
+            # `DepthwiseConv2d` op weight
+            shape_list = [1, -1] + [1] * len(data.shape[2:])
+            scale = scale.reshape(shape_list)
+            zero_point = zero_point.reshape(shape_list)
+        else:
+            raise ValueError("Unsupported weight shape({})".format(data.shape))
 
-    return np.round((data/scale) + zero_point)
+    return np.round((data / scale) + zero_point)
 
 
 def scale_zp_from_fack_quant_cell(cell, data_type):
@@ -183,9 +194,20 @@ def fold_batchnorm(weight, cell_quant):
     beta = cell_quant.beta.data.asnumpy()
     epsilon = cell_quant.eps
     sigma = np.sqrt(variance + epsilon)
-    gamma = gamma.reshape(-1, 1, 1, 1)
-    sigma = sigma.reshape(-1, 1, 1, 1)
-    mean = mean.reshape(-1, 1, 1, 1)
-    weight = weight * gamma / sigma
+
+    if gamma.shape[0] == weight.shape[0]:
+        # `Conv2d` or `Dense` op weight
+        shape_list = [-1] + [1] * len(weight.shape[1:])
+        _gamma = gamma.reshape(shape_list)
+        _sigma = sigma.reshape(shape_list)
+    elif gamma.shape[0] == weight.shape[1]:
+        # `DepthwiseConv2d` op weight
+        shape_list = [1, -1] + [1] * len(weight.shape[2:])
+        _gamma = gamma.reshape(shape_list)
+        _sigma = sigma.reshape(shape_list)
+    else:
+        raise ValueError("Unsupported weight shape({})".format(weight.shape))
+
+    weight = weight * _gamma / _sigma
     bias = beta - gamma * mean / sigma
     return weight, bias
diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py
index d74bee2706e..bc74986321c 100644
--- a/mindspore/train/serialization.py
+++ b/mindspore/train/serialization.py
@@ -302,7 +302,7 @@ def _save_graph(network, file_name):
     if graph_proto:
         with open(file_name, "wb") as f:
             f.write(graph_proto)
-        os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR)
+        os.chmod(file_name, stat.S_IRUSR)
 
 
 def _exec_save_checkpoint(train_network, ckpt_file_name, integrated_save=True):
@@ -424,6 +424,7 @@ def export(net, *inputs, file_name, file_format='GEIR'):
     if is_training:
         net.set_train(mode=False)
     # export model
+    net.init_parameters_data()
     if file_format == 'GEIR':
         _executor.compile(net, *inputs, phase='export')
         _executor.export(net, file_name, file_format)
@@ -462,19 +463,18 @@ def parse_print(print_file_name):
         List, element of list is Tensor.
 
     Raises:
-        ValueError: Print file is incorrect.
+        ValueError: The print file may be empty, please make sure enter the correct file name.
     """
-    if not os.path.realpath(print_file_name):
-        raise ValueError("Please input the correct print file name.")
+    print_file_path = os.path.realpath(print_file_name)
 
-    if os.path.getsize(print_file_name) == 0:
+    if os.path.getsize(print_file_path) == 0:
         raise ValueError("The print file may be empty, please make sure enter the correct file name.")
 
     logger.info("Execute load print process.")
     print_list = Print()
 
     try:
-        with open(print_file_name, "rb") as f:
+        with open(print_file_path, "rb") as f:
             pb_content = f.read()
         print_list.ParseFromString(pb_content)
     except BaseException as e:
diff --git a/model_zoo/README.md b/model_zoo/README.md
index 2dde9856797..1e392445af2 100644
--- a/model_zoo/README.md
+++ b/model_zoo/README.md
@@ -134,43 +134,41 @@ In order to facilitate developers to enjoy the benefits of MindSpore framework a
 
 | Parameters                 | AlexNet |
 | -------------------------- | ------- |
-| Published Year             |         |
-| Paper                      |         |
-| Resource                   |         |
-| Features                   |         |
-| MindSpore Version          |         |
-| Dataset                    |         |
-| Training Parameters        |         |
-| Optimizer                  |         |
-| Loss Function              |         |
-| Accuracy                   |         |
-| Speed                      |         |
-| Loss                       |         |
-| Params (M)                 |         |
-| Checkpoint for Fine tuning |         |
-| Model for inference        |         |
-| Scripts                    |         |
+| Published Year             | 2012                                                               |
+| Paper                      | [ImageNet Classification with Deep Convolutional Neural Networks](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-)        |
+| Resource                   | Ascend 910                                                         |
+| Features                   | support with Ascend, GPU                                           |
+| MindSpore Version          | 0.5.0-beta                                                         |
+| Dataset                    | CIFAR10                                                              |
+| Training Parameters        | epoch=30, batch_size=32                                            |
+| Optimizer                  | Momentum                                                           |
+| Loss Function              | SoftmaxCrossEntropyWithLogits                                      |
+| Accuracy                   | 88.23%                                                             |
+| Speed                      | 1481fps                                                            |
+| Loss                       | 0.108                                                                 |
+| Params (M)                 | 61.10                                                                 | 
+| Checkpoint for Fine tuning | 445MB(.ckpt file)                                                    |
+| Scripts                    | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/alexnet|
 
 #### [LeNet](#table-of-contents)
 
 | Parameters                 | LeNet |
 | -------------------------- | ----- |
-| Published Year             |       |
-| Paper                      |       |
-| Resource                   |       |
-| Features                   |       |
-| MindSpore Version          |       |
-| Dataset                    |       |
-| Training Parameters        |       |
-| Optimizer                  |       |
-| Loss Function              |       |
-| Accuracy                   |       |
-| Speed                      |       |
-| Loss                       |       |
-| Params (M)                 |       |
-| Checkpoint for Fine tuning |       |
-| Model for inference        |       |
-| Scripts                    |       |
+| Published Year             | 1998                                                             |
+| Paper                      | [Gradient-Based Learning Applied to Document Recognition](https://ieeexplore.ieee.org/abstract/document/726791)          |
+| Resource                   | Ascend 910             |
+| Features                   | support with Ascend, GPU, CPU                                  |
+| MindSpore Version          | 0.5.0-beta                                                       |
+| Dataset                    | MNIST                                                          |
+| Training Parameters        | epoch=10, batch_size=32                                          |
+| Optimizer                  | Momentum                                                    |
+| Loss Function              | SoftmaxCrossEntropyWithLogits                                    |
+| Accuracy                   | 98.52%                                                           |
+| Speed                      | 18680fps                                                         |
+| Loss                       | 0.004                                                            |
+| Params (M)                 | 0.06                                                            |
+| Checkpoint for Fine tuning | 483KB(.ckpt file)                                                |
+| Scripts                    | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/lenet|
 
 ### Object Detection and Segmentation 
 
diff --git a/model_zoo/Transformer/train.py b/model_zoo/Transformer/train.py
index 23c0eb78fd5..ffd6b8c7145 100644
--- a/model_zoo/Transformer/train.py
+++ b/model_zoo/Transformer/train.py
@@ -147,10 +147,11 @@ def run_transformer_train():
 
     callbacks = [TimeMonitor(dataset.get_dataset_size()), LossCallBack()]
     if args.enable_save_ckpt == "true":
-        ckpt_config = CheckpointConfig(save_checkpoint_steps=args.save_checkpoint_steps,
-                                       keep_checkpoint_max=args.save_checkpoint_num)
-        ckpoint_cb = ModelCheckpoint(prefix='transformer', directory=args.save_checkpoint_path, config=ckpt_config)
-        callbacks.append(ckpoint_cb)
+        if device_num == 1 or (device_num > 1 and rank_id == 0):
+            ckpt_config = CheckpointConfig(save_checkpoint_steps=args.save_checkpoint_steps,
+                                           keep_checkpoint_max=args.save_checkpoint_num)
+            ckpoint_cb = ModelCheckpoint(prefix='transformer', directory=args.save_checkpoint_path, config=ckpt_config)
+            callbacks.append(ckpoint_cb)
 
     if args.enable_lossscale == "true":
         scale_manager = DynamicLossScaleManager(init_loss_scale=cfg.init_loss_scale_value,
diff --git a/model_zoo/alexnet/eval.py b/model_zoo/alexnet/eval.py
index 41904516328..6a091aedd89 100644
--- a/model_zoo/alexnet/eval.py
+++ b/model_zoo/alexnet/eval.py
@@ -20,7 +20,7 @@ python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt
 
 import argparse
 from src.config import alexnet_cfg as cfg
-from src.dataset import create_dataset_mnist
+from src.dataset import create_dataset_cifar10
 from src.alexnet import AlexNet
 import mindspore.nn as nn
 from mindspore import context
@@ -50,8 +50,8 @@ if __name__ == "__main__":
     print("============== Starting Testing ==============")
     param_dict = load_checkpoint(args.ckpt_path)
     load_param_into_net(network, param_dict)
-    ds_eval = create_dataset_mnist(args.data_path,
-                                   cfg.batch_size,
-                                   status="test")
+    ds_eval = create_dataset_cifar10(args.data_path,
+                                     cfg.batch_size,
+                                     status="test")
     acc = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode)
     print("============== {} ==============".format(acc))
diff --git a/model_zoo/alexnet/src/dataset.py b/model_zoo/alexnet/src/dataset.py
index 6e9f310bed6..651c76d6e3e 100644
--- a/model_zoo/alexnet/src/dataset.py
+++ b/model_zoo/alexnet/src/dataset.py
@@ -23,7 +23,7 @@ from mindspore.common import dtype as mstype
 from .config import alexnet_cfg as cfg
 
 
-def create_dataset_mnist(data_path, batch_size=32, repeat_size=1, status="train"):
+def create_dataset_cifar10(data_path, batch_size=32, repeat_size=1, status="train"):
     """
     create dataset for train or test
     """
diff --git a/model_zoo/alexnet/train.py b/model_zoo/alexnet/train.py
index 184290c26c6..df038d62a23 100644
--- a/model_zoo/alexnet/train.py
+++ b/model_zoo/alexnet/train.py
@@ -20,7 +20,7 @@ python train.py --data_path /YourDataPath
 
 import argparse
 from src.config import alexnet_cfg as cfg
-from src.dataset import create_dataset_mnist
+from src.dataset import create_dataset_cifar10
 from src.generator_lr import get_lr
 from src.alexnet import AlexNet
 import mindspore.nn as nn
@@ -43,7 +43,7 @@ if __name__ == "__main__":
 
     context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
 
-    ds_train = create_dataset_mnist(args.data_path, cfg.batch_size, cfg.epoch_size)
+    ds_train = create_dataset_cifar10(args.data_path, cfg.batch_size, cfg.epoch_size)
     network = AlexNet(cfg.num_classes)
     loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
     lr = Tensor(get_lr(0, cfg.learning_rate, cfg.epoch_size, ds_train.get_dataset_size()))
diff --git a/model_zoo/bert/README.md b/model_zoo/bert/README.md
index 3ed2bf67835..45928da4e3f 100644
--- a/model_zoo/bert/README.md
+++ b/model_zoo/bert/README.md
@@ -5,9 +5,9 @@ This example implements pre-training, fine-tuning and evaluation of [BERT-base](
 ## Requirements
 - Install [MindSpore](https://www.mindspore.cn/install/en).
 - Download the zhwiki dataset for pre-training. Extract and clean text in the dataset with [WikiExtractor](https://github.com/attardi/wikiextractor). Convert the dataset to TFRecord format and move the files to a specified path.
-- Download the CLUE/SQuAD v1.1 dataset for fine-tuning and evaluation.
+- Download dataset for fine-tuning and evaluation such as CLUENER, TNEWS, SQuAD v1.1, etc.
 >  Notes:
-   If you are running a fine-tuning or evaluation task, prepare the corresponding checkpoint file.
+   If you are running a fine-tuning or evaluation task, prepare a checkpoint from pre-train.
 
 ## Running the Example
 ### Pre-Training
@@ -24,31 +24,15 @@ This example implements pre-training, fine-tuning and evaluation of [BERT-base](
     sh scripts/run_distribute_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_DIR SCHEMA_DIR MINDSPORE_HCCL_CONFIG_PATH
     ```  
 
-### Fine-Tuning
-- Set options in `finetune_config.py`. Make sure the 'data_file', 'schema_file' and 'pre_training_file' are set to your own path. Set the 'pre_training_ckpt' to a saved checkpoint file generated after pre-training.
+### Fine-Tuning and Evaluation
+- Set bert network config and optimizer hyperparameters in `finetune_eval_config.py`. 
 
-- Run `finetune.py` for fine-tuning of BERT-base and BERT-NEZHA model.
+- Set task related hyperparameters in scripts/run_XXX.sh. 
+
+- Run `bash scripts/run_XXX.py` for fine-tuning of BERT-base and BERT-NEZHA model.
 
     ```bash
-    python finetune.py
-    ```
-
-### Evaluation
-- Set options in `evaluation_config.py`. Make sure the 'data_file', 'schema_file' and 'finetune_ckpt' are set to your own path.
-
-- NER: Run `evaluation.py` for evaluation of BERT-base and BERT-NEZHA model.
-
-    ```bash
-    python evaluation.py
-    ```
-- SQuAD v1.1: Run `squadeval.py` and  `SQuAD_postprocess.py` for evaluation of BERT-base and BERT-NEZHA model.
-
-    ```bash
-    python squadeval.py
-    ```
-
-    ```bash
-    python SQuAD_postprocess.py
+    bash scripts/run_XXX.sh
     ```
 
 ## Usage
@@ -88,26 +72,56 @@ config.py:
     scale_window                    steps for once updatation of loss scale: N, default is 1000   
     optimizer                       optimizer used in the network: AdamWerigtDecayDynamicLR | Lamb | Momentum, default is "Lamb"
 
-finetune_config.py:
-    task                            task type: SeqLabeling | Regression | Classification | COLA | SQUAD
-    num_labels                      number of labels to do classification
-    data_file                       dataset file to load: PATH, default is "/your/path/train.tfrecord"
-    schema_file                     dataset schema file to load: PATH, default is "/your/path/schema.json"
-    epoch_num                       repeat counts of training: N, default is 5
-    ckpt_prefix                     prefix used to save checkpoint files: PREFIX, default is "bert"
-    ckpt_dir                        path to save checkpoint files: PATH, default is None
-    pre_training_ckpt               checkpoint file to load: PATH, default is "/your/path/pre_training.ckpt"
-    use_crf                         whether to use crf for evaluation. use_crf takes effect only when task type is NER, default is False
-    optimizer                       optimizer used in fine-tune network: AdamWeigtDecayDynamicLR | Lamb | Momentum, default is "Lamb"
+scripts/run_ner.sh:
+    device_target                   targeted device to run task: Ascend | GPU
+    do_train                        whether to run training on training set: true | false
+    do_eval                         whether to run eval on dev set: true | false
+    assessment_method               assessment method to do evaluation: f1 | clue_benchmark
+    use_crf                         whether to use crf to calculate loss: true | false
+    device_id                       device id to run task
+    epoch_num                       total number of training epochs to perform
+    num_class                       number of classes to do labeling
+    vocab_file_path                 the vocabulary file that the BERT model was trained on
+    label2id_file_path              label to id json file
+    save_finetune_checkpoint_path   path to save generated finetuning checkpoint
+    load_pretrain_checkpoint_path   initial checkpoint (usually from a pre-trained BERT model)
+    load_finetune_checkpoint_path   give a finetuning checkpoint path if only do eval
+    train_data_file_path            ner tfrecord for training. E.g., train.tfrecord
+    eval_data_file_path             ner tfrecord for predictions if f1 is used to evaluate result, ner json for predictions if clue_benchmark is used to evaluate result
+    schema_file_path                path to datafile schema file
+
+scripts/run_squad.sh:
+    device_target                   targeted device to run task: Ascend | GPU
+    do_train                        whether to run training on training set: true | false
+    do_eval                         whether to run eval on dev set: true | false
+    device_id                       device id to run task
+    epoch_num                       total number of training epochs to perform
+    num_class                       number of classes to classify, usually 2 for squad task
+    vocab_file_path                 the vocabulary file that the BERT model was trained on
+    eval_json_path                  path to squad dev json file
+    save_finetune_checkpoint_path   path to save generated finetuning checkpoint
+    load_pretrain_checkpoint_path   initial checkpoint (usually from a pre-trained BERT model)
+    load_finetune_checkpoint_path   give a finetuning checkpoint path if only do eval
+    train_data_file_path            squad tfrecord for training. E.g., train1.1.tfrecord
+    eval_data_file_path             squad tfrecord for predictions. E.g., dev1.1.tfrecord
+    schema_file_path                path to datafile schema file
+
+scripts/run_classifier.sh
+    device_target                   targeted device to run task: Ascend | GPU
+    do_train                        whether to run training on training set: true | false
+    do_eval                         whether to run eval on dev set: true | false
+    assessment_method               assessment method to do evaluation: accuracy | f1 | mcc | spearman_correlation
+    device_id                       device id to run task
+    epoch_num                       total number of training epochs to perform
+    num_class                       number of classes to do labeling
+    save_finetune_checkpoint_path   path to save generated finetuning checkpoint
+    load_pretrain_checkpoint_path   initial checkpoint (usually from a pre-trained BERT model)
+    load_finetune_checkpoint_path   give a finetuning checkpoint path if only do eval
+    train_data_file_path            tfrecord for training. E.g., train.tfrecord
+    eval_data_file_path             tfrecord for predictions. E.g., dev.tfrecord
+    schema_file_path                path to datafile schema file
+
 
-evaluation_config.py:
-    task                            task type: SeqLabeling | Regression | Classification | COLA
-    num_labels                      number of labels to do classsification
-    data_file                       dataset file to load: PATH, default is "/your/path/evaluation.tfrecord"
-    schema_file                     dataset schema file to load: PATH, default is "/your/path/schema.json"
-    finetune_ckpt                   checkpoint file to load: PATH, default is "/your/path/your.ckpt"
-    use_crf                         whether to use crf for evaluation. use_crf takes effect only when task type is NER, default is False
-    clue_benchmark                  whether to use clue benchmark. clue_benchmark takes effect only when task type is NER, default is False
 ```
 
 ### Parameters:
@@ -115,7 +129,7 @@ evaluation_config.py:
 Parameters for dataset and network (Pre-Training/Fine-Tuning/Evaluation):
     batch_size                      batch size of input dataset: N, default is 16
     seq_length                      length of input sequence: N, default is 128
-    vocab_size                      size of each embedding vector: N, default is 21136
+    vocab_size                      size of each embedding vector: N, must be consistant with the dataset you use. Default is 21136
     hidden_size                     size of bert encoder layers: N, default is 768
     num_hidden_layers               number of hidden layers: N, default is 12
     num_attention_heads             number of attention heads: N, default is 12
diff --git a/model_zoo/bert/evaluation.py b/model_zoo/bert/evaluation.py
deleted file mode 100644
index 4e8b2a3aea8..00000000000
--- a/model_zoo/bert/evaluation.py
+++ /dev/null
@@ -1,272 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-Bert evaluation script.
-"""
-
-import os
-import argparse
-import math
-import numpy as np
-import mindspore.common.dtype as mstype
-from mindspore import context
-from mindspore import log as logger
-from mindspore.common.tensor import Tensor
-import mindspore.dataset as de
-import mindspore.dataset.transforms.c_transforms as C
-from mindspore.train.model import Model
-from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from src.evaluation_config import cfg, bert_net_cfg
-from src.utils import BertNER, BertCLS, BertReg
-from src.CRF import postprocess
-from src.cluener_evaluation import submit
-from src.finetune_config import tag_to_index
-
-
-class Accuracy():
-    """
-    calculate accuracy
-    """
-    def __init__(self):
-        self.acc_num = 0
-        self.total_num = 0
-
-    def update(self, logits, labels):
-        """
-        Update accuracy
-        """
-        labels = labels.asnumpy()
-        labels = np.reshape(labels, -1)
-        logits = logits.asnumpy()
-        logit_id = np.argmax(logits, axis=-1)
-        self.acc_num += np.sum(labels == logit_id)
-        self.total_num += len(labels)
-        print("=========================accuracy is ", self.acc_num / self.total_num)
-
-
-class F1():
-    """
-    calculate F1 score
-    """
-    def __init__(self):
-        self.TP = 0
-        self.FP = 0
-        self.FN = 0
-
-    def update(self, logits, labels):
-        """
-        update F1 score
-        """
-        labels = labels.asnumpy()
-        labels = np.reshape(labels, -1)
-        if cfg.use_crf:
-            backpointers, best_tag_id = logits
-            best_path = postprocess(backpointers, best_tag_id)
-            logit_id = []
-            for ele in best_path:
-                logit_id.extend(ele)
-        else:
-            logits = logits.asnumpy()
-            logit_id = np.argmax(logits, axis=-1)
-            logit_id = np.reshape(logit_id, -1)
-        pos_eva = np.isin(logit_id, [i for i in range(1, cfg.num_labels)])
-        pos_label = np.isin(labels, [i for i in range(1, cfg.num_labels)])
-        self.TP += np.sum(pos_eva&pos_label)
-        self.FP += np.sum(pos_eva&(~pos_label))
-        self.FN += np.sum((~pos_eva)&pos_label)
-
-
-class MCC():
-    """
-    Calculate Matthews Correlation Coefficient.
-    """
-    def __init__(self):
-        self.TP = 0
-        self.FP = 0
-        self.FN = 0
-        self.TN = 0
-
-    def update(self, logits, labels):
-        """
-        Update MCC score
-        """
-        labels = labels.asnumpy()
-        labels = np.reshape(labels, -1)
-        labels = labels.astype(np.bool)
-        logits = logits.asnumpy()
-        logit_id = np.argmax(logits, axis=-1)
-        logit_id = np.reshape(logit_id, -1)
-        logit_id = logit_id.astype(np.bool)
-        ornot = logit_id ^ labels
-
-        self.TP += (~ornot & labels).sum()
-        self.FP += (ornot & ~labels).sum()
-        self.FN += (ornot & labels).sum()
-        self.TN += (~ornot & ~labels).sum()
-
-
-class Spearman_Correlation():
-    """
-    calculate Spearman Correlation coefficient
-    """
-    def __init__(self):
-        self.label = []
-        self.logit = []
-
-    def update(self, logits, labels):
-        """
-        Update Spearman Correlation
-        """
-        labels = labels.asnumpy()
-        labels = np.reshape(labels, -1)
-        logits = logits.asnumpy()
-        logits = np.reshape(logits, -1)
-        self.label.append(labels)
-        self.logit.append(logits)
-
-    def cal(self):
-        """
-        Calculate Spearman Correlation
-        """
-        label = np.concatenate(self.label)
-        logit = np.concatenate(self.logit)
-        sort_label = label.argsort()[::-1]
-        sort_logit = logit.argsort()[::-1]
-        n = len(label)
-        d_acc = 0
-        for i in range(n):
-            d = np.where(sort_label == i)[0] - np.where(sort_logit == i)[0]
-            d_acc += d**2
-        ps = 1 - 6*d_acc/n/(n**2-1)
-        return ps
-
-
-def get_dataset(batch_size=1, repeat_count=1, distribute_file=''):
-    """
-    get dataset
-    """
-    _ = distribute_file
-
-    ds = de.TFRecordDataset([cfg.data_file], cfg.schema_file, columns_list=["input_ids", "input_mask",
-                                                                            "segment_ids", "label_ids"])
-    type_cast_op = C.TypeCast(mstype.int32)
-    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
-    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
-    if cfg.task == "Regression":
-        type_cast_op_float = C.TypeCast(mstype.float32)
-        ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
-    else:
-        ds = ds.map(input_columns="label_ids", operations=type_cast_op)
-    ds = ds.repeat(repeat_count)
-
-    # apply shuffle operation
-    buffer_size = 960
-    ds = ds.shuffle(buffer_size=buffer_size)
-
-    # apply batch operations
-    ds = ds.batch(batch_size, drop_remainder=True)
-    return ds
-
-
-def bert_predict(Evaluation):
-    """
-    prediction function
-    """
-    target = args_opt.device_target
-    if target == "Ascend":
-        devid = int(os.getenv('DEVICE_ID'))
-        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=devid)
-    elif target == "GPU":
-        context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
-        if bert_net_cfg.compute_type != mstype.float32:
-            logger.warning('GPU only support fp32 temporarily, run with fp32.')
-            bert_net_cfg.compute_type = mstype.float32
-    else:
-        raise Exception("Target error, GPU or Ascend is supported.")
-    dataset = get_dataset(bert_net_cfg.batch_size, 1)
-    if cfg.use_crf:
-        net_for_pretraining = Evaluation(bert_net_cfg, False, num_labels=len(tag_to_index), use_crf=True,
-                                         tag_to_index=tag_to_index, dropout_prob=0.0)
-    else:
-        net_for_pretraining = Evaluation(bert_net_cfg, False, num_labels)
-    net_for_pretraining.set_train(False)
-    param_dict = load_checkpoint(cfg.finetune_ckpt)
-    load_param_into_net(net_for_pretraining, param_dict)
-    model = Model(net_for_pretraining)
-    return model, dataset
-
-def test_eval():
-    """
-    evaluation function
-    """
-    if cfg.task == "SeqLabeling":
-        task_type = BertNER
-    elif cfg.task == "Regression":
-        task_type = BertReg
-    elif cfg.task == "Classification":
-        task_type = BertCLS
-    elif cfg.task == "COLA":
-        task_type = BertCLS
-    else:
-        raise ValueError("Task not supported.")
-    model, dataset = bert_predict(task_type)
-
-    if cfg.clue_benchmark:
-        submit(model, cfg.data_file, bert_net_cfg.seq_length)
-    else:
-        if cfg.task == "SeqLabeling":
-            callback = F1()
-        elif cfg.task == "COLA":
-            callback = MCC()
-        elif cfg.task == "Regression":
-            callback = Spearman_Correlation()
-        else:
-            callback = Accuracy()
-
-        columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
-        for data in dataset.create_dict_iterator():
-            input_data = []
-            for i in columns_list:
-                input_data.append(Tensor(data[i]))
-            input_ids, input_mask, token_type_id, label_ids = input_data
-            logits = model.predict(input_ids, input_mask, token_type_id, label_ids)
-            callback.update(logits, label_ids)
-        print("==============================================================")
-        if cfg.task == "SeqLabeling":
-            print("Precision {:.6f} ".format(callback.TP / (callback.TP + callback.FP)))
-            print("Recall {:.6f} ".format(callback.TP / (callback.TP + callback.FN)))
-            print("F1 {:.6f} ".format(2*callback.TP / (2*callback.TP + callback.FP + callback.FN)))
-        elif cfg.task == "COLA":
-            TP = callback.TP
-            TN = callback.TN
-            FP = callback.FP
-            FN = callback.FN
-            mcc = (TP*TN-FP*FN)/math.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
-            print("MCC: {:.6f}".format(mcc))
-        elif cfg.task == "Regression":
-            print("Spearman Correlation is {:.6f}".format(callback.cal()[0]))
-        else:
-            print("acc_num {} , total_num {}, accuracy {:.6f}".format(callback.acc_num, callback.total_num,
-                                                                      callback.acc_num / callback.total_num))
-        print("==============================================================")
-
-parser = argparse.ArgumentParser(description='Bert eval')
-parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
-args_opt = parser.parse_args()
-if __name__ == "__main__":
-    num_labels = cfg.num_labels
-    test_eval()
diff --git a/model_zoo/bert/finetune.py b/model_zoo/bert/finetune.py
deleted file mode 100644
index eb1880b9cc8..00000000000
--- a/model_zoo/bert/finetune.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-Bert finetune script.
-"""
-
-import os
-import argparse
-from src.utils import BertFinetuneCell, BertCLS, BertNER, BertSquad, BertSquadCell, BertReg
-from src.finetune_config import cfg, bert_net_cfg, tag_to_index
-import mindspore.common.dtype as mstype
-from mindspore import context
-from mindspore import log as logger
-import mindspore.dataset as de
-import mindspore.dataset.transforms.c_transforms as C
-from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
-from mindspore.nn.optim import AdamWeightDecayDynamicLR, Lamb, Momentum
-from mindspore.train.model import Model
-from mindspore.train.callback import Callback
-from mindspore.train.callback import CheckpointConfig, ModelCheckpoint
-from mindspore.train.serialization import load_checkpoint, load_param_into_net
-
-class LossCallBack(Callback):
-    """
-    Monitor the loss in training.
-    If the loss is NAN or INF, terminate training.
-    Note:
-        If per_print_times is 0, do not print loss.
-    Args:
-        per_print_times (int): Print loss every times. Default: 1.
-    """
-    def __init__(self, per_print_times=1):
-        super(LossCallBack, self).__init__()
-        if not isinstance(per_print_times, int) or per_print_times < 0:
-            raise ValueError("print_step must be in and >= 0.")
-        self._per_print_times = per_print_times
-
-    def step_end(self, run_context):
-        cb_params = run_context.original_args()
-        with open("./loss.log", "a+") as f:
-            f.write("epoch: {}, step: {}, outputs are {}".format(cb_params.cur_epoch_num, cb_params.cur_step_num,
-                                                                 str(cb_params.net_outputs)))
-            f.write("\n")
-
-def get_dataset(batch_size=1, repeat_count=1, distribute_file=''):
-    """
-    get dataset
-    """
-    ds = de.TFRecordDataset([cfg.data_file], cfg.schema_file, columns_list=["input_ids", "input_mask",
-                                                                            "segment_ids", "label_ids"])
-    type_cast_op = C.TypeCast(mstype.int32)
-    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
-    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
-    if cfg.task == "Regression":
-        type_cast_op_float = C.TypeCast(mstype.float32)
-        ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
-    else:
-        ds = ds.map(input_columns="label_ids", operations=type_cast_op)
-    ds = ds.repeat(repeat_count)
-
-    # apply shuffle operation
-    buffer_size = 960
-    ds = ds.shuffle(buffer_size=buffer_size)
-
-    # apply batch operations
-    ds = ds.batch(batch_size, drop_remainder=True)
-    return ds
-
-def get_squad_dataset(batch_size=1, repeat_count=1, distribute_file=''):
-    """
-    get SQuAD dataset
-    """
-    ds = de.TFRecordDataset([cfg.data_file], cfg.schema_file, columns_list=["input_ids", "input_mask", "segment_ids",
-                                                                            "start_positions", "end_positions",
-                                                                            "unique_ids", "is_impossible"])
-    type_cast_op = C.TypeCast(mstype.int32)
-    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
-    ds = ds.map(input_columns="start_positions", operations=type_cast_op)
-    ds = ds.map(input_columns="end_positions", operations=type_cast_op)
-    ds = ds.repeat(repeat_count)
-
-    buffer_size = 960
-    ds = ds.shuffle(buffer_size=buffer_size)
-    ds = ds.batch(batch_size, drop_remainder=True)
-    return ds
-
-def test_train():
-    """
-    finetune function
-    """
-    target = args_opt.device_target
-    if target == "Ascend":
-        devid = int(os.getenv('DEVICE_ID'))
-        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=devid)
-    elif target == "GPU":
-        context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
-        if bert_net_cfg.compute_type != mstype.float32:
-            logger.warning('GPU only support fp32 temporarily, run with fp32.')
-            bert_net_cfg.compute_type = mstype.float32
-    else:
-        raise Exception("Target error, GPU or Ascend is supported.")
-    #BertCLSTrain for classification
-    #BertNERTrain for sequence labeling
-    if cfg.task == 'SeqLabeling':
-        if cfg.use_crf:
-            netwithloss = BertNER(bert_net_cfg, True, num_labels=len(tag_to_index), use_crf=True,
-                                  tag_to_index=tag_to_index, dropout_prob=0.1)
-        else:
-            netwithloss = BertNER(bert_net_cfg, True, num_labels=cfg.num_labels, dropout_prob=0.1)
-    elif cfg.task == 'SQUAD':
-        netwithloss = BertSquad(bert_net_cfg, True, 2, dropout_prob=0.1)
-    elif cfg.task == 'Regression':
-        netwithloss = BertReg(bert_net_cfg, True, num_labels=cfg.num_labels, dropout_prob=0.1)
-    elif cfg.task == 'Classification':
-        netwithloss = BertCLS(bert_net_cfg, True, num_labels=cfg.num_labels, dropout_prob=0.1)
-    else:
-        raise Exception("Target error, GPU or Ascend is supported.")
-    if cfg.task == 'SQUAD':
-        dataset = get_squad_dataset(bert_net_cfg.batch_size, cfg.epoch_num)
-    else:
-        dataset = get_dataset(bert_net_cfg.batch_size, cfg.epoch_num)
-    # optimizer
-    steps_per_epoch = dataset.get_dataset_size()
-    if cfg.optimizer == 'AdamWeightDecayDynamicLR':
-        optimizer = AdamWeightDecayDynamicLR(netwithloss.trainable_params(),
-                                             decay_steps=steps_per_epoch * cfg.epoch_num,
-                                             learning_rate=cfg.AdamWeightDecayDynamicLR.learning_rate,
-                                             end_learning_rate=cfg.AdamWeightDecayDynamicLR.end_learning_rate,
-                                             power=cfg.AdamWeightDecayDynamicLR.power,
-                                             warmup_steps=int(steps_per_epoch * cfg.epoch_num * 0.1),
-                                             weight_decay=cfg.AdamWeightDecayDynamicLR.weight_decay,
-                                             eps=cfg.AdamWeightDecayDynamicLR.eps)
-    elif cfg.optimizer == 'Lamb':
-        optimizer = Lamb(netwithloss.trainable_params(), decay_steps=steps_per_epoch * cfg.epoch_num,
-                         start_learning_rate=cfg.Lamb.start_learning_rate, end_learning_rate=cfg.Lamb.end_learning_rate,
-                         power=cfg.Lamb.power, weight_decay=cfg.Lamb.weight_decay,
-                         warmup_steps=int(steps_per_epoch * cfg.epoch_num * 0.1), decay_filter=cfg.Lamb.decay_filter)
-    elif cfg.optimizer == 'Momentum':
-        optimizer = Momentum(netwithloss.trainable_params(), learning_rate=cfg.Momentum.learning_rate,
-                             momentum=cfg.Momentum.momentum)
-    else:
-        raise Exception("Optimizer not supported.")
-    # load checkpoint into network
-    ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch, keep_checkpoint_max=1)
-    ckpoint_cb = ModelCheckpoint(prefix=cfg.ckpt_prefix, directory=cfg.ckpt_dir, config=ckpt_config)
-    param_dict = load_checkpoint(cfg.pre_training_ckpt)
-    load_param_into_net(netwithloss, param_dict)
-
-    update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32, scale_factor=2, scale_window=1000)
-    if cfg.task == 'SQUAD':
-        netwithgrads = BertSquadCell(netwithloss, optimizer=optimizer, scale_update_cell=update_cell)
-    else:
-        netwithgrads = BertFinetuneCell(netwithloss, optimizer=optimizer, scale_update_cell=update_cell)
-    model = Model(netwithgrads)
-    model.train(cfg.epoch_num, dataset, callbacks=[LossCallBack(), ckpoint_cb])
-
-
-parser = argparse.ArgumentParser(description='Bert finetune')
-parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
-args_opt = parser.parse_args()
-if __name__ == "__main__":
-    test_train()
diff --git a/model_zoo/bert/run_classifier.py b/model_zoo/bert/run_classifier.py
new file mode 100644
index 00000000000..4b2801f87c9
--- /dev/null
+++ b/model_zoo/bert/run_classifier.py
@@ -0,0 +1,201 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+'''
+Bert finetune and evaluation script.
+'''
+
+import os
+import argparse
+from src.bert_for_finetune import BertFinetuneCell, BertCLS
+from src.finetune_eval_config import optimizer_cfg, bert_net_cfg
+from src.dataset import create_classification_dataset
+from src.assessment_method import Accuracy, F1, MCC, Spearman_Correlation
+from src.utils import make_directory, LossCallBack, LoadNewestCkpt
+import mindspore.common.dtype as mstype
+from mindspore import context
+from mindspore import log as logger
+from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
+from mindspore.nn.optim import AdamWeightDecayDynamicLR, Lamb, Momentum
+from mindspore.common.tensor import Tensor
+from mindspore.train.model import Model
+from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
+
+_cur_dir = os.getcwd()
+
+def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path=""):
+    """ do train """
+    if load_checkpoint_path == "":
+        raise ValueError("Pretrain model missed, finetune task must load pretrain model!")
+    steps_per_epoch = dataset.get_dataset_size()
+    epoch_num = dataset.get_repeat_count()
+    # optimizer
+    if optimizer_cfg.optimizer == 'AdamWeightDecayDynamicLR':
+        optimizer = AdamWeightDecayDynamicLR(network.trainable_params(),
+                                             decay_steps=steps_per_epoch * epoch_num,
+                                             learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.learning_rate,
+                                             end_learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.end_learning_rate,
+                                             power=optimizer_cfg.AdamWeightDecayDynamicLR.power,
+                                             warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
+                                             weight_decay=optimizer_cfg.AdamWeightDecayDynamicLR.weight_decay,
+                                             eps=optimizer_cfg.AdamWeightDecayDynamicLR.eps)
+    elif optimizer_cfg.optimizer == 'Lamb':
+        optimizer = Lamb(network.trainable_params(), decay_steps=steps_per_epoch * epoch_num,
+                         start_learning_rate=optimizer_cfg.Lamb.start_learning_rate,
+                         end_learning_rate=optimizer_cfg.Lamb.end_learning_rate,
+                         power=optimizer_cfg.Lamb.power, weight_decay=optimizer_cfg.Lamb.weight_decay,
+                         warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
+                         decay_filter=optimizer_cfg.Lamb.decay_filter)
+    elif optimizer_cfg.optimizer == 'Momentum':
+        optimizer = Momentum(network.trainable_params(), learning_rate=optimizer_cfg.Momentum.learning_rate,
+                             momentum=optimizer_cfg.Momentum.momentum)
+    else:
+        raise Exception("Optimizer not supported. support: [AdamWeightDecayDynamicLR, Lamb, Momentum]")
+
+    # load checkpoint into network
+    ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch, keep_checkpoint_max=1)
+    ckpoint_cb = ModelCheckpoint(prefix="classifier", directory=save_checkpoint_path, config=ckpt_config)
+    param_dict = load_checkpoint(load_checkpoint_path)
+    load_param_into_net(network, param_dict)
+
+    update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32, scale_factor=2, scale_window=1000)
+    netwithgrads = BertFinetuneCell(network, optimizer=optimizer, scale_update_cell=update_cell)
+    model = Model(netwithgrads)
+    callbacks = [TimeMonitor(dataset.get_dataset_size()), LossCallBack(), ckpoint_cb]
+    model.train(epoch_num, dataset, callbacks=callbacks)
+
+def eval_result_print(assessment_method="accuracy", callback=None):
+    """ print eval result """
+    if assessment_method == "accuracy":
+        print("acc_num {} , total_num {}, accuracy {:.6f}".format(callback.acc_num, callback.total_num,
+                                                                  callback.acc_num / callback.total_num))
+    elif assessment_method == "f1":
+        print("Precision {:.6f} ".format(callback.TP / (callback.TP + callback.FP)))
+        print("Recall {:.6f} ".format(callback.TP / (callback.TP + callback.FN)))
+        print("F1 {:.6f} ".format(2 * callback.TP / (2 * callback.TP + callback.FP + callback.FN)))
+    elif assessment_method == "mcc":
+        print("MCC {:.6f} ".format(callback.cal()))
+    elif assessment_method == "spearman_correlation":
+        print("Spearman Correlation is {:.6f} ".format(callback.cal()[0]))
+    else:
+        raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
+
+def do_eval(dataset=None, network=None, num_class=2, assessment_method="accuracy", load_checkpoint_path=""):
+    """ do eval """
+    if load_checkpoint_path == "":
+        raise ValueError("Finetune model missed, evaluation task must load finetune model!")
+    net_for_pretraining = network(bert_net_cfg, False, num_class)
+    net_for_pretraining.set_train(False)
+    param_dict = load_checkpoint(load_checkpoint_path)
+    load_param_into_net(net_for_pretraining, param_dict)
+    model = Model(net_for_pretraining)
+
+    if assessment_method == "accuracy":
+        callback = Accuracy()
+    elif assessment_method == "f1":
+        callback = F1(False, num_class)
+    elif assessment_method == "mcc":
+        callback = MCC()
+    elif assessment_method == "spearman_correlation":
+        callback = Spearman_Correlation()
+    else:
+        raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
+
+    columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
+    for data in dataset.create_dict_iterator():
+        input_data = []
+        for i in columns_list:
+            input_data.append(Tensor(data[i]))
+        input_ids, input_mask, token_type_id, label_ids = input_data
+        logits = model.predict(input_ids, input_mask, token_type_id, label_ids)
+        callback.update(logits, label_ids)
+    print("==============================================================")
+    eval_result_print(assessment_method, callback)
+    print("==============================================================")
+
+def run_classifier():
+    """run classifier task"""
+    parser = argparse.ArgumentParser(description="run classifier")
+    parser.add_argument("--device_target", type=str, default="Ascend", help="Device type, default is Ascend")
+    parser.add_argument("--assessment_method", type=str, default="accuracy", help="assessment_method include: "
+                                                                                  "[MCC, Spearman_correlation, "
+                                                                                  "Accuracy], default is accuracy")
+    parser.add_argument("--do_train", type=str, default="false", help="Eable train, default is false")
+    parser.add_argument("--do_eval", type=str, default="false", help="Eable eval, default is false")
+    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
+    parser.add_argument("--epoch_num", type=int, default="1", help="Epoch number, default is 1.")
+    parser.add_argument("--num_class", type=int, default="2", help="The number of class, default is 2.")
+    parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path")
+    parser.add_argument("--load_pretrain_checkpoint_path", type=str, default="", help="Load checkpoint file path")
+    parser.add_argument("--load_finetune_checkpoint_path", type=str, default="", help="Load checkpoint file path")
+    parser.add_argument("--train_data_file_path", type=str, default="",
+                        help="Data path, it is better to use absolute path")
+    parser.add_argument("--eval_data_file_path", type=str, default="",
+                        help="Data path, it is better to use absolute path")
+    parser.add_argument("--schema_file_path", type=str, default="",
+                        help="Schema path, it is better to use absolute path")
+    args_opt = parser.parse_args()
+    epoch_num = args_opt.epoch_num
+    assessment_method = args_opt.assessment_method.lower()
+    load_pretrain_checkpoint_path = args_opt.load_pretrain_checkpoint_path
+    save_finetune_checkpoint_path = args_opt.save_finetune_checkpoint_path
+    load_finetune_checkpoint_path = args_opt.load_finetune_checkpoint_path
+
+    if args_opt.do_train.lower() == "false" and args_opt.do_eval.lower() == "false":
+        raise ValueError("At least one of 'do_train' or 'do_eval' must be true")
+    if args_opt.do_train.lower() == "true" and args_opt.train_data_file_path == "":
+        raise ValueError("'train_data_file_path' must be set when do finetune task")
+    if args_opt.do_eval.lower() == "true" and args_opt.eval_data_file_path == "":
+        raise ValueError("'eval_data_file_path' must be set when do evaluation task")
+
+    target = args_opt.device_target
+    if target == "Ascend":
+        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
+    elif target == "GPU":
+        context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+        if bert_net_cfg.compute_type != mstype.float32:
+            logger.warning('GPU only support fp32 temporarily, run with fp32.')
+            bert_net_cfg.compute_type = mstype.float32
+    else:
+        raise Exception("Target error, GPU or Ascend is supported.")
+
+    netwithloss = BertCLS(bert_net_cfg, True, num_labels=args_opt.num_class, dropout_prob=0.1,
+                          assessment_method=assessment_method)
+
+    if args_opt.do_train.lower() == "true":
+        ds = create_classification_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
+                                           assessment_method=assessment_method,
+                                           data_file_path=args_opt.train_data_file_path,
+                                           schema_file_path=args_opt.schema_file_path)
+        do_train(ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path)
+
+        if args_opt.do_eval.lower() == "true":
+            if save_finetune_checkpoint_path == "":
+                load_finetune_checkpoint_dir = _cur_dir
+            else:
+                load_finetune_checkpoint_dir = make_directory(save_finetune_checkpoint_path)
+            load_finetune_checkpoint_path = LoadNewestCkpt(load_finetune_checkpoint_dir,
+                                                           ds.get_dataset_size(), epoch_num, "classifier")
+
+    if args_opt.do_eval.lower() == "true":
+        ds = create_classification_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
+                                           assessment_method=assessment_method,
+                                           data_file_path=args_opt.eval_data_file_path,
+                                           schema_file_path=args_opt.schema_file_path)
+        do_eval(ds, BertCLS, args_opt.num_class, assessment_method, load_finetune_checkpoint_path)
+
+if __name__ == "__main__":
+    run_classifier()
diff --git a/model_zoo/bert/run_ner.py b/model_zoo/bert/run_ner.py
new file mode 100644
index 00000000000..a61c96066e8
--- /dev/null
+++ b/model_zoo/bert/run_ner.py
@@ -0,0 +1,228 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+'''
+Bert finetune and evaluation script.
+'''
+
+import os
+import json
+import argparse
+from src.bert_for_finetune import BertFinetuneCell, BertNER
+from src.finetune_eval_config import optimizer_cfg, bert_net_cfg
+from src.dataset import create_ner_dataset
+from src.utils import make_directory, LossCallBack, LoadNewestCkpt
+from src.assessment_method import Accuracy, F1, MCC, Spearman_Correlation
+import mindspore.common.dtype as mstype
+from mindspore import context
+from mindspore import log as logger
+from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
+from mindspore.nn.optim import AdamWeightDecayDynamicLR, Lamb, Momentum
+from mindspore.common.tensor import Tensor
+from mindspore.train.model import Model
+from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
+
+_cur_dir = os.getcwd()
+
+
+def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path=""):
+    """ do train """
+    if load_checkpoint_path == "":
+        raise ValueError("Pretrain model missed, finetune task must load pretrain model!")
+    steps_per_epoch = dataset.get_dataset_size()
+    epoch_num = dataset.get_repeat_count()
+    # optimizer
+    if optimizer_cfg.optimizer == 'AdamWeightDecayDynamicLR':
+        optimizer = AdamWeightDecayDynamicLR(network.trainable_params(),
+                                             decay_steps=steps_per_epoch * epoch_num,
+                                             learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.learning_rate,
+                                             end_learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.end_learning_rate,
+                                             power=optimizer_cfg.AdamWeightDecayDynamicLR.power,
+                                             warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
+                                             weight_decay=optimizer_cfg.AdamWeightDecayDynamicLR.weight_decay,
+                                             eps=optimizer_cfg.AdamWeightDecayDynamicLR.eps)
+    elif optimizer_cfg.optimizer == 'Lamb':
+        optimizer = Lamb(network.trainable_params(), decay_steps=steps_per_epoch * epoch_num,
+                         start_learning_rate=optimizer_cfg.Lamb.start_learning_rate,
+                         end_learning_rate=optimizer_cfg.Lamb.end_learning_rate,
+                         power=optimizer_cfg.Lamb.power, weight_decay=optimizer_cfg.Lamb.weight_decay,
+                         warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
+                         decay_filter=optimizer_cfg.Lamb.decay_filter)
+    elif optimizer_cfg.optimizer == 'Momentum':
+        optimizer = Momentum(network.trainable_params(), learning_rate=optimizer_cfg.Momentum.learning_rate,
+                             momentum=optimizer_cfg.Momentum.momentum)
+    else:
+        raise Exception("Optimizer not supported. support: [AdamWeightDecayDynamicLR, Lamb, Momentum]")
+
+    # load checkpoint into network
+    ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch, keep_checkpoint_max=1)
+    ckpoint_cb = ModelCheckpoint(prefix="ner", directory=save_checkpoint_path, config=ckpt_config)
+    param_dict = load_checkpoint(load_checkpoint_path)
+    load_param_into_net(network, param_dict)
+
+    update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32, scale_factor=2, scale_window=1000)
+    netwithgrads = BertFinetuneCell(network, optimizer=optimizer, scale_update_cell=update_cell)
+    model = Model(netwithgrads)
+    callbacks = [TimeMonitor(dataset.get_dataset_size()), LossCallBack(), ckpoint_cb]
+    model.train(epoch_num, dataset, callbacks=callbacks)
+
+def eval_result_print(assessment_method="accuracy", callback=None):
+    """print eval result"""
+    if assessment_method == "accuracy":
+        print("acc_num {} , total_num {}, accuracy {:.6f}".format(callback.acc_num, callback.total_num,
+                                                                  callback.acc_num / callback.total_num))
+    elif assessment_method == "f1":
+        print("Precision {:.6f} ".format(callback.TP / (callback.TP + callback.FP)))
+        print("Recall {:.6f} ".format(callback.TP / (callback.TP + callback.FN)))
+        print("F1 {:.6f} ".format(2 * callback.TP / (2 * callback.TP + callback.FP + callback.FN)))
+    elif assessment_method == "mcc":
+        print("MCC {:.6f} ".format(callback.cal()))
+    elif assessment_method == "spearman_correlation":
+        print("Spearman Correlation is {:.6f} ".format(callback.cal()[0]))
+    else:
+        raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
+
+def do_eval(dataset=None, network=None, use_crf="", num_class=2, assessment_method="accuracy", data_file="",
+            load_checkpoint_path="", vocab_file="", label2id_file="", tag_to_index=None):
+    """ do eval """
+    if load_checkpoint_path == "":
+        raise ValueError("Finetune model missed, evaluation task must load finetune model!")
+    if assessment_method == "clue_benchmark":
+        bert_net_cfg.batch_size = 1
+    net_for_pretraining = network(bert_net_cfg, False, num_class, use_crf=(use_crf.lower() == "true"),
+                                  tag_to_index=tag_to_index)
+    net_for_pretraining.set_train(False)
+    param_dict = load_checkpoint(load_checkpoint_path)
+    load_param_into_net(net_for_pretraining, param_dict)
+    model = Model(net_for_pretraining)
+
+    if assessment_method == "clue_benchmark":
+        from src.cluener_evaluation import submit
+        submit(model=model, path=data_file, vocab_file=vocab_file, use_crf=use_crf, label2id_file=label2id_file)
+    else:
+        if assessment_method == "accuracy":
+            callback = Accuracy()
+        elif assessment_method == "f1":
+            callback = F1((use_crf.lower() == "true"), num_class)
+        elif assessment_method == "mcc":
+            callback = MCC()
+        elif assessment_method == "spearman_correlation":
+            callback = Spearman_Correlation()
+        else:
+            raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
+
+        columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
+        for data in dataset.create_dict_iterator():
+            input_data = []
+            for i in columns_list:
+                input_data.append(Tensor(data[i]))
+            input_ids, input_mask, token_type_id, label_ids = input_data
+            logits = model.predict(input_ids, input_mask, token_type_id, label_ids)
+            callback.update(logits, label_ids)
+        print("==============================================================")
+        eval_result_print(assessment_method, callback)
+        print("==============================================================")
+
+def run_ner():
+    """run ner task"""
+    parser = argparse.ArgumentParser(description="run classifier")
+    parser.add_argument("--device_target", type=str, default="Ascend", help="Device type, default is Ascend")
+    parser.add_argument("--assessment_method", type=str, default="accuracy", help="assessment_method include: "
+                                                                                  "[F1, clue_benchmark], default is F1")
+    parser.add_argument("--do_train", type=str, default="false", help="Eable train, default is false")
+    parser.add_argument("--do_eval", type=str, default="false", help="Eable eval, default is false")
+    parser.add_argument("--use_crf", type=str, default="false", help="Use crf, default is false")
+    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
+    parser.add_argument("--epoch_num", type=int, default="1", help="Epoch number, default is 1.")
+    parser.add_argument("--num_class", type=int, default="2", help="The number of class, default is 2.")
+    parser.add_argument("--vocab_file_path", type=str, default="", help="Vocab file path, used in clue benchmark")
+    parser.add_argument("--label2id_file_path", type=str, default="", help="label2id file path, used in clue benchmark")
+    parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path")
+    parser.add_argument("--load_pretrain_checkpoint_path", type=str, default="", help="Load checkpoint file path")
+    parser.add_argument("--load_finetune_checkpoint_path", type=str, default="", help="Load checkpoint file path")
+    parser.add_argument("--train_data_file_path", type=str, default="",
+                        help="Data path, it is better to use absolute path")
+    parser.add_argument("--eval_data_file_path", type=str, default="",
+                        help="Data path, it is better to use absolute path")
+    parser.add_argument("--schema_file_path", type=str, default="",
+                        help="Schema path, it is better to use absolute path")
+    args_opt = parser.parse_args()
+    epoch_num = args_opt.epoch_num
+    assessment_method = args_opt.assessment_method.lower()
+    load_pretrain_checkpoint_path = args_opt.load_pretrain_checkpoint_path
+    save_finetune_checkpoint_path = args_opt.save_finetune_checkpoint_path
+    load_finetune_checkpoint_path = args_opt.load_finetune_checkpoint_path
+
+    if args_opt.do_train.lower() == "false" and args_opt.do_eval.lower() == "false":
+        raise ValueError("At least one of 'do_train' or 'do_eval' must be true")
+    if args_opt.do_train.lower() == "true" and args_opt.train_data_file_path == "":
+        raise ValueError("'train_data_file_path' must be set when do finetune task")
+    if args_opt.do_eval.lower() == "true" and args_opt.eval_data_file_path == "":
+        raise ValueError("'eval_data_file_path' must be set when do evaluation task")
+    if args_opt.assessment_method.lower() == "clue_benchmark" and args_opt.vocab_file_path == "":
+        raise ValueError("'vocab_file_path' must be set to do clue benchmark")
+    if args_opt.use_crf.lower() == "true" and args_opt.label2id_file_path == "":
+        raise ValueError("'label2id_file_path' must be set to use crf")
+    if args_opt.assessment_method.lower() == "clue_benchmark" and args_opt.label2id_file_path == "":
+        raise ValueError("'label2id_file_path' must be set to do clue benchmark")
+
+    target = args_opt.device_target
+    if target == "Ascend":
+        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
+    elif target == "GPU":
+        context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+        if bert_net_cfg.compute_type != mstype.float32:
+            logger.warning('GPU only support fp32 temporarily, run with fp32.')
+            bert_net_cfg.compute_type = mstype.float32
+    else:
+        raise Exception("Target error, GPU or Ascend is supported.")
+
+    tag_to_index = None
+    if args_opt.use_crf.lower() == "true":
+        with open(args_opt.label2id_file_path) as json_file:
+            tag_to_index = json.load(json_file)
+        max_val = max(tag_to_index.values())
+        tag_to_index["<START>"] = max_val + 1
+        tag_to_index["<STOP>"] = max_val + 2
+        number_labels = len(tag_to_index)
+    else:
+        number_labels = args_opt.num_class
+    netwithloss = BertNER(bert_net_cfg, True, num_labels=number_labels,
+                          use_crf=(args_opt.use_crf.lower() == "true"),
+                          tag_to_index=tag_to_index, dropout_prob=0.1)
+    if args_opt.do_train.lower() == "true":
+        ds = create_ner_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
+                                assessment_method=assessment_method, data_file_path=args_opt.train_data_file_path,
+                                schema_file_path=args_opt.schema_file_path)
+        do_train(ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path)
+
+        if args_opt.do_eval.lower() == "true":
+            if save_finetune_checkpoint_path == "":
+                load_finetune_checkpoint_dir = _cur_dir
+            else:
+                load_finetune_checkpoint_dir = make_directory(save_finetune_checkpoint_path)
+            load_finetune_checkpoint_path = LoadNewestCkpt(load_finetune_checkpoint_dir,
+                                                           ds.get_dataset_size(), epoch_num, "ner")
+
+    if args_opt.do_eval.lower() == "true":
+        ds = create_ner_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
+                                assessment_method=assessment_method, data_file_path=args_opt.eval_data_file_path,
+                                schema_file_path=args_opt.schema_file_path)
+        do_eval(ds, BertNER, args_opt.use_crf, number_labels, assessment_method, args_opt.eval_data_file_path,
+                load_finetune_checkpoint_path, args_opt.vocab_file_path, args_opt.label2id_file_path, tag_to_index)
+
+if __name__ == "__main__":
+    run_ner()
diff --git a/model_zoo/bert/run_pretrain.py b/model_zoo/bert/run_pretrain.py
index 65768946c17..7123c942f3c 100644
--- a/model_zoo/bert/run_pretrain.py
+++ b/model_zoo/bert/run_pretrain.py
@@ -26,33 +26,16 @@ from mindspore import context
 from mindspore.train.model import Model
 from mindspore.train.parallel_utils import ParallelMode
 from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
-from mindspore.train.callback import Callback, ModelCheckpoint, CheckpointConfig, TimeMonitor
+from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 from mindspore.nn.optim import Lamb, Momentum, AdamWeightDecayDynamicLR
 from mindspore import log as logger
 from src import BertNetworkWithLoss, BertTrainOneStepCell, BertTrainOneStepWithLossScaleCell
 from src.dataset import create_bert_dataset
 from src.config import cfg, bert_net_cfg
+from src.utils import LossCallBack
 _current_dir = os.path.dirname(os.path.realpath(__file__))
 
-class LossCallBack(Callback):
-    """
-    Monitor the loss in training.
-    If the loss in NAN or INF terminating training.
-    Note:
-        if per_print_times is 0 do not print loss.
-    Args:
-        per_print_times (int): Print loss every times. Default: 1.
-    """
-    def __init__(self, per_print_times=1):
-        super(LossCallBack, self).__init__()
-        if not isinstance(per_print_times, int) or per_print_times < 0:
-            raise ValueError("print_step must be int and >= 0")
-        self._per_print_times = per_print_times
-    def step_end(self, run_context):
-        cb_params = run_context.original_args()
-        print("epoch: {}, step: {}, outputs are {}".format(cb_params.cur_epoch_num, cb_params.cur_step_num,
-                                                           str(cb_params.net_outputs)))
 
 def run_pretrain():
     """pre-train bert_clue"""
diff --git a/model_zoo/bert/run_squad.py b/model_zoo/bert/run_squad.py
new file mode 100644
index 00000000000..083cedac1dc
--- /dev/null
+++ b/model_zoo/bert/run_squad.py
@@ -0,0 +1,204 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+'''
+Bert finetune and evaluation script.
+'''
+import os
+import argparse
+import collections
+from src.bert_for_finetune import BertSquadCell, BertSquad
+from src.finetune_eval_config import optimizer_cfg, bert_net_cfg
+from src.dataset import create_squad_dataset
+from src import tokenization
+from src.create_squad_data import read_squad_examples, convert_examples_to_features
+from src.run_squad import write_predictions
+from src.utils import make_directory, LossCallBack, LoadNewestCkpt
+import mindspore.common.dtype as mstype
+from mindspore import context
+from mindspore import log as logger
+from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
+from mindspore.nn.optim import AdamWeightDecayDynamicLR, Lamb, Momentum
+from mindspore.common.tensor import Tensor
+from mindspore.train.model import Model
+from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
+
+_cur_dir = os.getcwd()
+
+def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path=""):
+    """ do train """
+    if load_checkpoint_path == "":
+        raise ValueError("Pretrain model missed, finetune task must load pretrain model!")
+    steps_per_epoch = dataset.get_dataset_size()
+    epoch_num = dataset.get_repeat_count()
+    # optimizer
+    if optimizer_cfg.optimizer == 'AdamWeightDecayDynamicLR':
+        optimizer = AdamWeightDecayDynamicLR(network.trainable_params(),
+                                             decay_steps=steps_per_epoch * epoch_num,
+                                             learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.learning_rate,
+                                             end_learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.end_learning_rate,
+                                             power=optimizer_cfg.AdamWeightDecayDynamicLR.power,
+                                             warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
+                                             weight_decay=optimizer_cfg.AdamWeightDecayDynamicLR.weight_decay,
+                                             eps=optimizer_cfg.AdamWeightDecayDynamicLR.eps)
+    elif optimizer_cfg.optimizer == 'Lamb':
+        optimizer = Lamb(network.trainable_params(), decay_steps=steps_per_epoch * epoch_num,
+                         start_learning_rate=optimizer_cfg.Lamb.start_learning_rate,
+                         end_learning_rate=optimizer_cfg.Lamb.end_learning_rate,
+                         power=optimizer_cfg.Lamb.power, weight_decay=optimizer_cfg.Lamb.weight_decay,
+                         warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
+                         decay_filter=optimizer_cfg.Lamb.decay_filter)
+    elif optimizer_cfg.optimizer == 'Momentum':
+        optimizer = Momentum(network.trainable_params(), learning_rate=optimizer_cfg.Momentum.learning_rate,
+                             momentum=optimizer_cfg.Momentum.momentum)
+    else:
+        raise Exception("Optimizer not supported. support: [AdamWeightDecayDynamicLR, Lamb, Momentum]")
+
+    # load checkpoint into network
+    ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch, keep_checkpoint_max=1)
+    ckpoint_cb = ModelCheckpoint(prefix="squad", directory=save_checkpoint_path, config=ckpt_config)
+    param_dict = load_checkpoint(load_checkpoint_path)
+    load_param_into_net(network, param_dict)
+
+    update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32, scale_factor=2, scale_window=1000)
+    netwithgrads = BertSquadCell(network, optimizer=optimizer, scale_update_cell=update_cell)
+    model = Model(netwithgrads)
+    callbacks = [TimeMonitor(dataset.get_dataset_size()), LossCallBack(), ckpoint_cb]
+    model.train(epoch_num, dataset, callbacks=callbacks)
+
+
+def do_eval(dataset=None, vocab_file="", eval_json="", load_checkpoint_path="", seq_length=384):
+    """ do eval """
+    if load_checkpoint_path == "":
+        raise ValueError("Finetune model missed, evaluation task must load finetune model!")
+    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=True)
+    eval_examples = read_squad_examples(eval_json, False)
+    eval_features = convert_examples_to_features(
+        examples=eval_examples,
+        tokenizer=tokenizer,
+        max_seq_length=seq_length,
+        doc_stride=128,
+        max_query_length=64,
+        is_training=False,
+        output_fn=None,
+        verbose_logging=False)
+
+    net = BertSquad(bert_net_cfg, False, 2)
+    net.set_train(False)
+    param_dict = load_checkpoint(load_checkpoint_path)
+    load_param_into_net(net, param_dict)
+    model = Model(net)
+    output = []
+    RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"])
+    columns_list = ["input_ids", "input_mask", "segment_ids", "unique_ids"]
+    for data in dataset.create_dict_iterator():
+        input_data = []
+        for i in columns_list:
+            input_data.append(Tensor(data[i]))
+        input_ids, input_mask, segment_ids, unique_ids = input_data
+        start_positions = Tensor([1], mstype.float32)
+        end_positions = Tensor([1], mstype.float32)
+        is_impossible = Tensor([1], mstype.float32)
+        logits = model.predict(input_ids, input_mask, segment_ids, start_positions,
+                               end_positions, unique_ids, is_impossible)
+        ids = logits[0].asnumpy()
+        start = logits[1].asnumpy()
+        end = logits[2].asnumpy()
+
+        for i in range(bert_net_cfg.batch_size):
+            unique_id = int(ids[i])
+            start_logits = [float(x) for x in start[i].flat]
+            end_logits = [float(x) for x in end[i].flat]
+            output.append(RawResult(
+                unique_id=unique_id,
+                start_logits=start_logits,
+                end_logits=end_logits))
+    write_predictions(eval_examples, eval_features, output, 20, 30, True, "./predictions.json", None, None)
+
+def run_squad():
+    """run squad task"""
+    parser = argparse.ArgumentParser(description="run classifier")
+    parser.add_argument("--device_target", type=str, default="Ascend", help="Device type, default is Ascend")
+    parser.add_argument("--do_train", type=str, default="false", help="Eable train, default is false")
+    parser.add_argument("--do_eval", type=str, default="false", help="Eable eval, default is false")
+    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
+    parser.add_argument("--epoch_num", type=int, default="1", help="Epoch number, default is 1.")
+    parser.add_argument("--num_class", type=int, default="2", help="The number of class, default is 2.")
+    parser.add_argument("--vocab_file_path", type=str, default="", help="Vocab file path")
+    parser.add_argument("--eval_json_path", type=str, default="", help="Evaluation json file path, can be eval.json")
+    parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path")
+    parser.add_argument("--load_pretrain_checkpoint_path", type=str, default="", help="Load checkpoint file path")
+    parser.add_argument("--load_finetune_checkpoint_path", type=str, default="", help="Load checkpoint file path")
+    parser.add_argument("--train_data_file_path", type=str, default="",
+                        help="Data path, it is better to use absolute path")
+    parser.add_argument("--eval_data_file_path", type=str, default="",
+                        help="Data path, it is better to use absolute path")
+    parser.add_argument("--schema_file_path", type=str, default="",
+                        help="Schema path, it is better to use absolute path")
+    args_opt = parser.parse_args()
+    epoch_num = args_opt.epoch_num
+    load_pretrain_checkpoint_path = args_opt.load_pretrain_checkpoint_path
+    save_finetune_checkpoint_path = args_opt.save_finetune_checkpoint_path
+    load_finetune_checkpoint_path = args_opt.load_finetune_checkpoint_path
+
+    if args_opt.do_train.lower() == "false" and args_opt.do_eval.lower() == "false":
+        raise ValueError("At least one of 'do_train' or 'do_eval' must be true")
+    if args_opt.do_train.lower() == "true" and args_opt.train_data_file_path == "":
+        raise ValueError("'train_data_file_path' must be set when do finetune task")
+    if args_opt.do_eval.lower() == "true":
+        if args_opt.eval_data_file_path == "":
+            raise ValueError("'eval_data_file_path' must be set when do evaluation task")
+        if args_opt.vocab_file_path == "":
+            raise ValueError("'vocab_file_path' must be set when do evaluation task")
+        if args_opt.eval_json_path == "":
+            raise ValueError("'tokenization_file_path' must be set when do evaluation task")
+
+
+    target = args_opt.device_target
+    if target == "Ascend":
+        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
+    elif target == "GPU":
+        context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+        if bert_net_cfg.compute_type != mstype.float32:
+            logger.warning('GPU only support fp32 temporarily, run with fp32.')
+            bert_net_cfg.compute_type = mstype.float32
+    else:
+        raise Exception("Target error, GPU or Ascend is supported.")
+
+    netwithloss = BertSquad(bert_net_cfg, True, 2, dropout_prob=0.1)
+
+    if args_opt.do_train.lower() == "true":
+        ds = create_squad_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
+                                  data_file_path=args_opt.train_data_file_path,
+                                  schema_file_path=args_opt.schema_file_path)
+        do_train(ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path)
+        if args_opt.do_eval.lower() == "true":
+            if save_finetune_checkpoint_path == "":
+                load_finetune_checkpoint_dir = _cur_dir
+            else:
+                load_finetune_checkpoint_dir = make_directory(save_finetune_checkpoint_path)
+            load_finetune_checkpoint_path = LoadNewestCkpt(load_finetune_checkpoint_dir,
+                                                           ds.get_dataset_size(), epoch_num, "squad")
+
+    if args_opt.do_eval.lower() == "true":
+        ds = create_squad_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
+                                  data_file_path=args_opt.eval_data_file_path,
+                                  schema_file_path=args_opt.schema_file_path, is_training=False)
+        do_eval(ds, args_opt.vocab_file_path, args_opt.eval_json_path,
+                load_finetune_checkpoint_path, bert_net_cfg.seq_length)
+
+if __name__ == "__main__":
+    run_squad()
diff --git a/model_zoo/bert/scripts/run_classifier.sh b/model_zoo/bert/scripts/run_classifier.sh
new file mode 100644
index 00000000000..275324b9508
--- /dev/null
+++ b/model_zoo/bert/scripts/run_classifier.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+echo "=============================================================================================================="
+echo "Please run the scipt as: "
+echo "bash scripts/run_classifier.sh"
+echo "for example: bash scripts/run_classifier.sh"
+echo "assessment_method include: [MCC, Spearman_correlation ,Accuracy]"
+echo "=============================================================================================================="
+
+mkdir -p ms_log
+CUR_DIR=`pwd`
+PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
+export GLOG_log_dir=${CUR_DIR}/ms_log
+export GLOG_logtostderr=0
+python ${PROJECT_DIR}/../run_classifier.py  \
+    --device_target="Ascend" \
+    --do_train="true" \
+    --do_eval="false" \
+    --assessment_method="Accuracy" \
+    --device_id=0 \
+    --epoch_num=1 \
+    --num_class=2 \
+    --save_finetune_checkpoint_path="" \
+    --load_pretrain_checkpoint_path="" \
+    --load_finetune_checkpoint_path="" \
+    --train_data_file_path="" \
+    --eval_data_file_path="" \
+    --schema_file_path="" > log.txt 2>&1 &
diff --git a/model_zoo/bert/scripts/run_distribute_pretrain.sh b/model_zoo/bert/scripts/run_distribute_pretrain.sh
index 5a9f8735aa1..eb3a0979d15 100644
--- a/model_zoo/bert/scripts/run_distribute_pretrain.sh
+++ b/model_zoo/bert/scripts/run_distribute_pretrain.sh
@@ -24,8 +24,7 @@ echo "==========================================================================
 EPOCH_SIZE=$2
 DATA_DIR=$3
 SCHEMA_DIR=$4
-
-export MINDSPORE_HCCL_CONFIG_PATH=$5
+PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
 export RANK_TABLE_FILE=$5
 export RANK_SIZE=$1
 cores=`cat /proc/cpuinfo|grep "processor" |wc -l`
@@ -54,7 +53,7 @@ do
     export GLOG_log_dir=${CUR_DIR}/ms_log
     export GLOG_logtostderr=0
     env > env.log
-    taskset -c $cmdopt python ../run_pretrain.py  \
+    taskset -c $cmdopt python ${PROJECT_DIR}/../run_pretrain.py  \
     --distribute="true" \
     --epoch_size=$EPOCH_SIZE \
     --device_id=$DEVICE_ID \
diff --git a/model_zoo/bert/scripts/run_ner.sh b/model_zoo/bert/scripts/run_ner.sh
new file mode 100644
index 00000000000..ae401b2462d
--- /dev/null
+++ b/model_zoo/bert/scripts/run_ner.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+echo "=============================================================================================================="
+echo "Please run the scipt as: "
+echo "bash scripts/run_ner.sh"
+echo "for example: bash scripts/run_ner.sh"
+echo "assessment_method include: [F1, clue_benchmark]"
+echo "=============================================================================================================="
+
+mkdir -p ms_log
+CUR_DIR=`pwd`
+PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
+export GLOG_log_dir=${CUR_DIR}/ms_log
+export GLOG_logtostderr=0
+python ${PROJECT_DIR}/../run_ner.py  \
+    --device_target="Ascend" \
+    --do_train="true" \
+    --do_eval="false" \
+    --assessment_method="F1" \
+    --use_crf="false" \
+    --device_id=0 \
+    --epoch_num=1 \
+    --num_class=2 \
+    --vocab_file_path="" \
+    --label2id_file_path="" \
+    --save_finetune_checkpoint_path="" \
+    --load_pretrain_checkpoint_path="" \
+    --load_finetune_checkpoint_path="" \
+    --train_data_file_path="" \
+    --eval_data_file_path="" \
+    --schema_file_path="" > log.txt 2>&1 &
diff --git a/model_zoo/bert/scripts/run_squad.sh b/model_zoo/bert/scripts/run_squad.sh
new file mode 100644
index 00000000000..a33950cadb6
--- /dev/null
+++ b/model_zoo/bert/scripts/run_squad.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+echo "=============================================================================================================="
+echo "Please run the scipt as: "
+echo "bash scripts/run_squad.sh"
+echo "for example: bash scripts/run_squad.sh"
+echo "assessment_method include: [Accuracy]"
+echo "=============================================================================================================="
+
+mkdir -p ms_log
+CUR_DIR=`pwd`
+PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
+export GLOG_log_dir=${CUR_DIR}/ms_log
+export GLOG_logtostderr=0
+python ${PROJECT_DIR}/../run_squad.py  \
+    --device_target="Ascend" \
+    --do_train="true" \
+    --do_eval="false" \
+    --device_id=0 \
+    --epoch_num=1 \
+    --num_class=2 \
+    --vocab_file_path="" \
+    --eval_json_path="" \
+    --save_finetune_checkpoint_path="" \
+    --load_pretrain_checkpoint_path="" \
+    --load_finetune_checkpoint_path="" \
+    --train_data_file_path="" \
+    --eval_data_file_path="" \
+    --schema_file_path="" > log.txt 2>&1 &
diff --git a/model_zoo/bert/scripts/run_standalone_pretrain.sh b/model_zoo/bert/scripts/run_standalone_pretrain.sh
index 3cd9545f7f7..f59eb69601f 100644
--- a/model_zoo/bert/scripts/run_standalone_pretrain.sh
+++ b/model_zoo/bert/scripts/run_standalone_pretrain.sh
@@ -26,10 +26,11 @@ DATA_DIR=$3
 SCHEMA_DIR=$4
 
 mkdir -p ms_log 
+PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
 CUR_DIR=`pwd`
 export GLOG_log_dir=${CUR_DIR}/ms_log
 export GLOG_logtostderr=0
-python run_pretrain.py  \
+python ${PROJECT_DIR}/../run_pretrain.py  \
     --distribute="false" \
     --epoch_size=$EPOCH_SIZE \
     --device_id=$DEVICE_ID \
diff --git a/model_zoo/bert/squadeval.py b/model_zoo/bert/squadeval.py
deleted file mode 100644
index 49027acd6dc..00000000000
--- a/model_zoo/bert/squadeval.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Evaluation script for SQuAD task"""
-
-import os
-import collections
-import mindspore.dataset as de
-import mindspore.dataset.transforms.c_transforms as C
-import mindspore.common.dtype as mstype
-from mindspore import context
-from mindspore.common.tensor import Tensor
-from mindspore.train.model import Model
-from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from src import tokenization
-from src.evaluation_config import cfg, bert_net_cfg
-from src.utils import BertSquad
-from src.create_squad_data import read_squad_examples, convert_examples_to_features
-from src.run_squad import write_predictions
-
-def get_squad_dataset(batch_size=1, repeat_count=1, distribute_file=''):
-    """get SQuAD dataset from tfrecord"""
-    ds = de.TFRecordDataset([cfg.data_file], cfg.schema_file, columns_list=["input_ids", "input_mask",
-                                                                            "segment_ids", "unique_ids"],
-                            shuffle=False)
-    type_cast_op = C.TypeCast(mstype.int32)
-    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
-    ds = ds.repeat(repeat_count)
-    ds = ds.batch(batch_size, drop_remainder=True)
-    return ds
-
-def test_eval():
-    """Evaluation function for SQuAD task"""
-    tokenizer = tokenization.FullTokenizer(vocab_file="./vocab.txt", do_lower_case=True)
-    input_file = "dataset/v1.1/dev-v1.1.json"
-    eval_examples = read_squad_examples(input_file, False)
-    eval_features = convert_examples_to_features(
-        examples=eval_examples,
-        tokenizer=tokenizer,
-        max_seq_length=384,
-        doc_stride=128,
-        max_query_length=64,
-        is_training=False,
-        output_fn=None,
-        verbose_logging=False)
-
-    device_id = int(os.getenv('DEVICE_ID'))
-    context.set_context(mode=context.GRAPH_MODE, device_target='Ascend', device_id=device_id)
-    dataset = get_squad_dataset(bert_net_cfg.batch_size, 1)
-    net = BertSquad(bert_net_cfg, False, 2)
-    net.set_train(False)
-    param_dict = load_checkpoint(cfg.finetune_ckpt)
-    load_param_into_net(net, param_dict)
-    model = Model(net)
-    output = []
-    RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"])
-    columns_list = ["input_ids", "input_mask", "segment_ids", "unique_ids"]
-    for data in dataset.create_dict_iterator():
-        input_data = []
-        for i in columns_list:
-            input_data.append(Tensor(data[i]))
-        input_ids, input_mask, segment_ids, unique_ids = input_data
-        start_positions = Tensor([1], mstype.float32)
-        end_positions = Tensor([1], mstype.float32)
-        is_impossible = Tensor([1], mstype.float32)
-        logits = model.predict(input_ids, input_mask, segment_ids, start_positions,
-                               end_positions, unique_ids, is_impossible)
-        ids = logits[0].asnumpy()
-        start = logits[1].asnumpy()
-        end = logits[2].asnumpy()
-
-        for i in range(bert_net_cfg.batch_size):
-            unique_id = int(ids[i])
-            start_logits = [float(x) for x in start[i].flat]
-            end_logits = [float(x) for x in end[i].flat]
-            output.append(RawResult(
-                unique_id=unique_id,
-                start_logits=start_logits,
-                end_logits=end_logits))
-    write_predictions(eval_examples, eval_features, output, 20, 30, True, "./predictions.json",
-                      None, None, False, False)
-
-
-if __name__ == "__main__":
-    test_eval()
diff --git a/model_zoo/bert/src/assessment_method.py b/model_zoo/bert/src/assessment_method.py
new file mode 100644
index 00000000000..ca6579cabf5
--- /dev/null
+++ b/model_zoo/bert/src/assessment_method.py
@@ -0,0 +1,134 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+'''
+Bert evaluation assessment method script.
+'''
+import math
+import numpy as np
+from .CRF import postprocess
+
+class Accuracy():
+    '''
+    calculate accuracy
+    '''
+    def __init__(self):
+        self.acc_num = 0
+        self.total_num = 0
+    def update(self, logits, labels):
+        labels = labels.asnumpy()
+        labels = np.reshape(labels, -1)
+        logits = logits.asnumpy()
+        logit_id = np.argmax(logits, axis=-1)
+        self.acc_num += np.sum(labels == logit_id)
+        self.total_num += len(labels)
+        print("=========================accuracy is ", self.acc_num / self.total_num)
+
+class F1():
+    '''
+    calculate F1 score
+    '''
+    def __init__(self, use_crf=False, num_labels=2):
+        self.TP = 0
+        self.FP = 0
+        self.FN = 0
+        self.use_crf = use_crf
+        self.num_labels = num_labels
+
+    def update(self, logits, labels):
+        '''
+        update F1 score
+        '''
+        labels = labels.asnumpy()
+        labels = np.reshape(labels, -1)
+        if self.use_crf:
+            backpointers, best_tag_id = logits
+            best_path = postprocess(backpointers, best_tag_id)
+            logit_id = []
+            for ele in best_path:
+                logit_id.extend(ele)
+        else:
+            logits = logits.asnumpy()
+            logit_id = np.argmax(logits, axis=-1)
+            logit_id = np.reshape(logit_id, -1)
+        pos_eva = np.isin(logit_id, [i for i in range(1, self.num_labels)])
+        pos_label = np.isin(labels, [i for i in range(1, self.num_labels)])
+        self.TP += np.sum(pos_eva&pos_label)
+        self.FP += np.sum(pos_eva&(~pos_label))
+        self.FN += np.sum((~pos_eva)&pos_label)
+
+class MCC():
+    '''
+    Calculate Matthews Correlation Coefficient
+    '''
+    def __init__(self):
+        self.TP = 0
+        self.FP = 0
+        self.FN = 0
+        self.TN = 0
+    def update(self, logits, labels):
+        '''
+        MCC update
+        '''
+        labels = labels.asnumpy()
+        labels = np.reshape(labels, -1)
+        labels = labels.astype(np.bool)
+        logits = logits.asnumpy()
+        logit_id = np.argmax(logits, axis=-1)
+        logit_id = np.reshape(logit_id, -1)
+        logit_id = logit_id.astype(np.bool)
+        ornot = logit_id ^ labels
+
+        self.TP += (~ornot & labels).sum()
+        self.FP += (ornot & ~labels).sum()
+        self.FN += (ornot & labels).sum()
+        self.TN += (~ornot & ~labels).sum()
+
+    def cal(self):
+        mcc = (self.TP*self.TN - self.FP*self.FN)/math.sqrt((self.TP+self.FP)*(self.TP+self.FN) *
+                                                            (self.TN+self.FP)*(self.TN+self.FN))
+        return mcc
+
+class Spearman_Correlation():
+    '''
+    Calculate Spearman Correlation Coefficient
+    '''
+    def __init__(self):
+        self.label = []
+        self.logit = []
+
+    def update(self, logits, labels):
+        labels = labels.asnumpy()
+        labels = np.reshape(labels, -1)
+        logits = logits.asnumpy()
+        logits = np.reshape(logits, -1)
+        self.label.append(labels)
+        self.logit.append(logits)
+
+    def cal(self):
+        '''
+        Calculate Spearman Correlation
+        '''
+        label = np.concatenate(self.label)
+        logit = np.concatenate(self.logit)
+        sort_label = label.argsort()[::-1]
+        sort_logit = logit.argsort()[::-1]
+        n = len(label)
+        d_acc = 0
+        for i in range(n):
+            d = np.where(sort_label == i)[0] - np.where(sort_logit == i)[0]
+            d_acc += d**2
+        ps = 1 - 6*d_acc/n/(n**2-1)
+        return ps
diff --git a/model_zoo/bert/src/bert_for_finetune.py b/model_zoo/bert/src/bert_for_finetune.py
new file mode 100644
index 00000000000..32ac0823b97
--- /dev/null
+++ b/model_zoo/bert/src/bert_for_finetune.py
@@ -0,0 +1,327 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+'''
+Bert for finetune script.
+'''
+
+import mindspore.nn as nn
+from mindspore.ops import operations as P
+from mindspore.ops import functional as F
+from mindspore.ops import composite as C
+from mindspore.common.tensor import Tensor
+from mindspore.common.parameter import Parameter, ParameterTuple
+from mindspore.common import dtype as mstype
+from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
+from mindspore.train.parallel_utils import ParallelMode
+from mindspore.communication.management import get_group_size
+from mindspore import context
+from .bert_for_pre_training import clip_grad
+from .finetune_eval_model import BertCLSModel, BertNERModel, BertSquadModel
+from .utils import CrossEntropyCalculation
+
+
+GRADIENT_CLIP_TYPE = 1
+GRADIENT_CLIP_VALUE = 1.0
+grad_scale = C.MultitypeFuncGraph("grad_scale")
+reciprocal = P.Reciprocal()
+@grad_scale.register("Tensor", "Tensor")
+def tensor_grad_scale(scale, grad):
+    return grad * reciprocal(scale)
+
+_grad_overflow = C.MultitypeFuncGraph("_grad_overflow")
+grad_overflow = P.FloatStatus()
+@_grad_overflow.register("Tensor")
+def _tensor_grad_overflow(grad):
+    return grad_overflow(grad)
+
+class BertFinetuneCell(nn.Cell):
+    """
+    Especifically defined for finetuning where only four inputs tensor are needed.
+    """
+    def __init__(self, network, optimizer, scale_update_cell=None):
+
+        super(BertFinetuneCell, self).__init__(auto_prefix=False)
+        self.network = network
+        self.weights = ParameterTuple(network.trainable_params())
+        self.optimizer = optimizer
+        self.grad = C.GradOperation('grad',
+                                    get_by_list=True,
+                                    sens_param=True)
+        self.reducer_flag = False
+        self.allreduce = P.AllReduce()
+        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
+        if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
+            self.reducer_flag = True
+        self.grad_reducer = None
+        if self.reducer_flag:
+            mean = context.get_auto_parallel_context("mirror_mean")
+            degree = get_group_size()
+            self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
+        self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
+        self.cast = P.Cast()
+        self.gpu_target = False
+        if context.get_context("device_target") == "GPU":
+            self.gpu_target = True
+            self.float_status = P.FloatStatus()
+            self.addn = P.AddN()
+            self.reshape = P.Reshape()
+        else:
+            self.alloc_status = P.NPUAllocFloatStatus()
+            self.get_status = P.NPUGetFloatStatus()
+            self.clear_before_grad = P.NPUClearFloatStatus()
+        self.reduce_sum = P.ReduceSum(keep_dims=False)
+        self.depend_parameter_use = P.ControlDepend(depend_mode=1)
+        self.base = Tensor(1, mstype.float32)
+        self.less_equal = P.LessEqual()
+        self.hyper_map = C.HyperMap()
+        self.loss_scale = None
+        self.loss_scaling_manager = scale_update_cell
+        if scale_update_cell:
+            self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
+                                        name="loss_scale")
+
+    def construct(self,
+                  input_ids,
+                  input_mask,
+                  token_type_id,
+                  label_ids,
+                  sens=None):
+
+
+        weights = self.weights
+        init = False
+        loss = self.network(input_ids,
+                            input_mask,
+                            token_type_id,
+                            label_ids)
+        if sens is None:
+            scaling_sens = self.loss_scale
+        else:
+            scaling_sens = sens
+
+        if not self.gpu_target:
+            init = self.alloc_status()
+            clear_before_grad = self.clear_before_grad(init)
+            F.control_depend(loss, init)
+            self.depend_parameter_use(clear_before_grad, scaling_sens)
+        grads = self.grad(self.network, weights)(input_ids,
+                                                 input_mask,
+                                                 token_type_id,
+                                                 label_ids,
+                                                 self.cast(scaling_sens,
+                                                           mstype.float32))
+        grads = self.hyper_map(F.partial(grad_scale, scaling_sens), grads)
+        grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
+        if self.reducer_flag:
+            grads = self.grad_reducer(grads)
+        if not self.gpu_target:
+            flag = self.get_status(init)
+            flag_sum = self.reduce_sum(init, (0,))
+            F.control_depend(grads, flag)
+            F.control_depend(flag, flag_sum)
+        else:
+            flag_sum = self.hyper_map(F.partial(_grad_overflow), grads)
+            flag_sum = self.addn(flag_sum)
+            flag_sum = self.reshape(flag_sum, (()))
+        if self.is_distributed:
+            flag_reduce = self.allreduce(flag_sum)
+            cond = self.less_equal(self.base, flag_reduce)
+        else:
+            cond = self.less_equal(self.base, flag_sum)
+        overflow = cond
+        if sens is None:
+            overflow = self.loss_scaling_manager(self.loss_scale, cond)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond)
+        return F.depend(ret, succ)
+
+class BertSquadCell(nn.Cell):
+    """
+    specifically defined for finetuning where only four inputs tensor are needed.
+    """
+    def __init__(self, network, optimizer, scale_update_cell=None):
+        super(BertSquadCell, self).__init__(auto_prefix=False)
+        self.network = network
+        self.weights = ParameterTuple(network.trainable_params())
+        self.optimizer = optimizer
+        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
+        self.reducer_flag = False
+        self.allreduce = P.AllReduce()
+        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
+        if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
+            self.reducer_flag = True
+        self.grad_reducer = None
+        if self.reducer_flag:
+            mean = context.get_auto_parallel_context("mirror_mean")
+            degree = get_group_size()
+            self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
+        self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
+        self.cast = P.Cast()
+        self.alloc_status = P.NPUAllocFloatStatus()
+        self.get_status = P.NPUGetFloatStatus()
+        self.clear_before_grad = P.NPUClearFloatStatus()
+        self.reduce_sum = P.ReduceSum(keep_dims=False)
+        self.depend_parameter_use = P.ControlDepend(depend_mode=1)
+        self.base = Tensor(1, mstype.float32)
+        self.less_equal = P.LessEqual()
+        self.hyper_map = C.HyperMap()
+        self.loss_scale = None
+        self.loss_scaling_manager = scale_update_cell
+        if scale_update_cell:
+            self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
+                                        name="loss_scale")
+    def construct(self,
+                  input_ids,
+                  input_mask,
+                  token_type_id,
+                  start_position,
+                  end_position,
+                  unique_id,
+                  is_impossible,
+                  sens=None):
+        weights = self.weights
+        init = self.alloc_status()
+        loss = self.network(input_ids,
+                            input_mask,
+                            token_type_id,
+                            start_position,
+                            end_position,
+                            unique_id,
+                            is_impossible)
+        if sens is None:
+            scaling_sens = self.loss_scale
+        else:
+            scaling_sens = sens
+        grads = self.grad(self.network, weights)(input_ids,
+                                                 input_mask,
+                                                 token_type_id,
+                                                 start_position,
+                                                 end_position,
+                                                 unique_id,
+                                                 is_impossible,
+                                                 self.cast(scaling_sens,
+                                                           mstype.float32))
+        clear_before_grad = self.clear_before_grad(init)
+        F.control_depend(loss, init)
+        self.depend_parameter_use(clear_before_grad, scaling_sens)
+        grads = self.hyper_map(F.partial(grad_scale, scaling_sens), grads)
+        grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
+        if self.reducer_flag:
+            grads = self.grad_reducer(grads)
+        flag = self.get_status(init)
+        flag_sum = self.reduce_sum(init, (0,))
+        if self.is_distributed:
+            flag_reduce = self.allreduce(flag_sum)
+            cond = self.less_equal(self.base, flag_reduce)
+        else:
+            cond = self.less_equal(self.base, flag_sum)
+        F.control_depend(grads, flag)
+        F.control_depend(flag, flag_sum)
+        overflow = cond
+        if sens is None:
+            overflow = self.loss_scaling_manager(self.loss_scale, cond)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond)
+        return F.depend(ret, succ)
+
+class BertCLS(nn.Cell):
+    """
+    Train interface for classification finetuning task.
+    """
+    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False,
+                 assessment_method=""):
+        super(BertCLS, self).__init__()
+        self.bert = BertCLSModel(config, is_training, num_labels, dropout_prob, use_one_hot_embeddings,
+                                 assessment_method)
+        self.loss = CrossEntropyCalculation(is_training)
+        self.num_labels = num_labels
+        self.assessment_method = assessment_method
+        self.is_training = is_training
+    def construct(self, input_ids, input_mask, token_type_id, label_ids):
+        logits = self.bert(input_ids, input_mask, token_type_id)
+        if self.assessment_method == "spearman_correlation":
+            if self.is_training:
+                loss = self.loss(logits, label_ids)
+            else:
+                loss = logits
+        else:
+            loss = self.loss(logits, label_ids, self.num_labels)
+        return loss
+
+
+class BertNER(nn.Cell):
+    """
+    Train interface for sequence labeling finetuning task.
+    """
+    def __init__(self, config, is_training, num_labels=11, use_crf=False, tag_to_index=None, dropout_prob=0.0,
+                 use_one_hot_embeddings=False):
+        super(BertNER, self).__init__()
+        self.bert = BertNERModel(config, is_training, num_labels, use_crf, dropout_prob, use_one_hot_embeddings)
+        if use_crf:
+            if not tag_to_index:
+                raise Exception("The dict for tag-index mapping should be provided for CRF.")
+            from src.CRF import CRF
+            self.loss = CRF(tag_to_index, config.batch_size, config.seq_length, is_training)
+        else:
+            self.loss = CrossEntropyCalculation(is_training)
+        self.num_labels = num_labels
+        self.use_crf = use_crf
+    def construct(self, input_ids, input_mask, token_type_id, label_ids):
+        logits = self.bert(input_ids, input_mask, token_type_id)
+        if self.use_crf:
+            loss = self.loss(logits, label_ids)
+        else:
+            loss = self.loss(logits, label_ids, self.num_labels)
+        return loss
+
+class BertSquad(nn.Cell):
+    '''
+    Train interface for SQuAD finetuning task.
+    '''
+    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
+        super(BertSquad, self).__init__()
+        self.bert = BertSquadModel(config, is_training, num_labels, dropout_prob, use_one_hot_embeddings)
+        self.loss = CrossEntropyCalculation(is_training)
+        self.num_labels = num_labels
+        self.seq_length = config.seq_length
+        self.is_training = is_training
+        self.total_num = Parameter(Tensor([0], mstype.float32), name='total_num')
+        self.start_num = Parameter(Tensor([0], mstype.float32), name='start_num')
+        self.end_num = Parameter(Tensor([0], mstype.float32), name='end_num')
+        self.sum = P.ReduceSum()
+        self.equal = P.Equal()
+        self.argmax = P.ArgMaxWithValue(axis=1)
+        self.squeeze = P.Squeeze(axis=-1)
+
+    def construct(self, input_ids, input_mask, token_type_id, start_position, end_position, unique_id, is_impossible):
+        logits = self.bert(input_ids, input_mask, token_type_id)
+        if self.is_training:
+            unstacked_logits_0 = self.squeeze(logits[:, :, 0:1])
+            unstacked_logits_1 = self.squeeze(logits[:, :, 1:2])
+            start_loss = self.loss(unstacked_logits_0, start_position, self.seq_length)
+            end_loss = self.loss(unstacked_logits_1, end_position, self.seq_length)
+            total_loss = (start_loss + end_loss) / 2.0
+        else:
+            start_logits = self.squeeze(logits[:, :, 0:1])
+            end_logits = self.squeeze(logits[:, :, 1:2])
+            total_loss = (unique_id, start_logits, end_logits)
+        return total_loss
diff --git a/model_zoo/bert/src/clue_classification_dataset_process.py b/model_zoo/bert/src/clue_classification_dataset_process.py
new file mode 100755
index 00000000000..1e27fe03529
--- /dev/null
+++ b/model_zoo/bert/src/clue_classification_dataset_process.py
@@ -0,0 +1,153 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""
+sample script of processing CLUE classification dataset using mindspore.dataset.text for fine-tuning bert
+"""
+
+import os
+import numpy as np
+
+import mindspore.common.dtype as mstype
+import mindspore.dataset as ds
+import mindspore.dataset.text as text
+import mindspore.dataset.transforms.c_transforms as ops
+
+
+def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path,
+                               data_usage='train', shuffle_dataset=False, max_seq_len=128, batch_size=64):
+    """Process TNEWS dataset"""
+    ### Loading TNEWS from CLUEDataset
+    assert data_usage in ['train', 'eval', 'test']
+    if data_usage == 'train':
+        dataset = ds.CLUEDataset(os.path.join(data_dir, "train.json"), task='TNEWS',
+                                 usage=data_usage, shuffle=shuffle_dataset)
+    elif data_usage == 'eval':
+        dataset = ds.CLUEDataset(os.path.join(data_dir, "dev.json"), task='TNEWS',
+                                 usage=data_usage, shuffle=shuffle_dataset)
+    else:
+        dataset = ds.CLUEDataset(os.path.join(data_dir, "test.json"), task='TNEWS',
+                                 usage=data_usage, shuffle=shuffle_dataset)
+    ### Processing label
+    if data_usage == 'test':
+        dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"],
+                              columns_order=["id", "label_id", "sentence"], operations=ops.Duplicate())
+        dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0))
+    else:
+        label_vocab = text.Vocab.from_list(label_list)
+        label_lookup = text.Lookup(label_vocab)
+        dataset = dataset.map(input_columns="label_desc", output_columns="label_id", operations=label_lookup)
+    ### Processing sentence
+    vocab = text.Vocab.from_file(bert_vocab_path)
+    tokenizer = text.BertTokenizer(vocab, lower_case=True)
+    lookup = text.Lookup(vocab, unknown_token='[UNK]')
+    dataset = dataset.map(input_columns=["sentence"], operations=tokenizer)
+    dataset = dataset.map(input_columns=["sentence"], operations=ops.Slice(slice(0, max_seq_len)))
+    dataset = dataset.map(input_columns=["sentence"],
+                          operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
+                                                     append=np.array(["[SEP]"], dtype='S')))
+    dataset = dataset.map(input_columns=["sentence"], output_columns=["text_ids"], operations=lookup)
+    dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0))
+    dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"],
+                          columns_order=["label_id", "text_ids", "mask_ids"], operations=ops.Duplicate())
+    dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32))
+    dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "segment_ids"],
+                          columns_order=["label_id", "text_ids", "mask_ids", "segment_ids"], operations=ops.Duplicate())
+    dataset = dataset.map(input_columns=["segment_ids"], operations=ops.Fill(0))
+    dataset = dataset.batch(batch_size)
+    label = []
+    text_ids = []
+    mask_ids = []
+    segment_ids = []
+    for data in dataset:
+        label.append(data[0])
+        text_ids.append(data[1])
+        mask_ids.append(data[2])
+        segment_ids.append(data[3])
+    return label, text_ids, mask_ids, segment_ids
+
+
+def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path,
+                               data_usage='train', shuffle_dataset=False, max_seq_len=128, batch_size=64):
+    """Process CMNLI dataset"""
+    ### Loading CMNLI from CLUEDataset
+    assert data_usage in ['train', 'eval', 'test']
+    if data_usage == 'train':
+        dataset = ds.CLUEDataset(os.path.join(data_dir, "train.json"), task='CMNLI',
+                                 usage=data_usage, shuffle=shuffle_dataset)
+    elif data_usage == 'eval':
+        dataset = ds.CLUEDataset(os.path.join(data_dir, "dev.json"), task='CMNLI',
+                                 usage=data_usage, shuffle=shuffle_dataset)
+    else:
+        dataset = ds.CLUEDataset(os.path.join(data_dir, "test.json"), task='CMNLI',
+                                 usage=data_usage, shuffle=shuffle_dataset)
+    ### Processing label
+    if data_usage == 'test':
+        dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"],
+                              columns_order=["id", "label_id", "sentence1", "sentence2"], operations=ops.Duplicate())
+        dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0))
+    else:
+        label_vocab = text.Vocab.from_list(label_list)
+        label_lookup = text.Lookup(label_vocab)
+        dataset = dataset.map(input_columns="label", output_columns="label_id", operations=label_lookup)
+    ### Processing sentence pairs
+    vocab = text.Vocab.from_file(bert_vocab_path)
+    tokenizer = text.BertTokenizer(vocab, lower_case=True)
+    lookup = text.Lookup(vocab, unknown_token='[UNK]')
+    ### Tokenizing sentences and truncate sequence pair
+    dataset = dataset.map(input_columns=["sentence1"], operations=tokenizer)
+    dataset = dataset.map(input_columns=["sentence2"], operations=tokenizer)
+    dataset = dataset.map(input_columns=["sentence1", "sentence2"],
+                          operations=text.TruncateSequencePair(max_seq_len-3))
+    ### Adding special tokens
+    dataset = dataset.map(input_columns=["sentence1"],
+                          operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
+                                                     append=np.array(["[SEP]"], dtype='S')))
+    dataset = dataset.map(input_columns=["sentence2"],
+                          operations=ops.Concatenate(append=np.array(["[SEP]"], dtype='S')))
+    ### Generating segment_ids
+    dataset = dataset.map(input_columns=["sentence1"], output_columns=["sentence1", "type_sentence1"],
+                          columns_order=["sentence1", "type_sentence1", "sentence2", "label_id"],
+                          operations=ops.Duplicate())
+    dataset = dataset.map(input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"],
+                          columns_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"],
+                          operations=ops.Duplicate())
+    dataset = dataset.map(input_columns=["type_sentence1"], operations=[lookup, ops.Fill(0)])
+    dataset = dataset.map(input_columns=["type_sentence2"], operations=[lookup, ops.Fill(1)])
+    dataset = dataset.map(input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"],
+                          columns_order=["sentence1", "sentence2", "segment_ids", "label_id"],
+                          operations=ops.Concatenate())
+    dataset = dataset.map(input_columns=["segment_ids"], operations=ops.PadEnd([max_seq_len], 0))
+    ### Generating text_ids
+    dataset = dataset.map(input_columns=["sentence1", "sentence2"], output_columns=["text_ids"],
+                          columns_order=["text_ids", "segment_ids", "label_id"],
+                          operations=ops.Concatenate())
+    dataset = dataset.map(input_columns=["text_ids"], operations=lookup)
+    dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0))
+    ### Generating mask_ids
+    dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"],
+                          columns_order=["label_id", "text_ids", "mask_ids", "segment_ids"], operations=ops.Duplicate())
+    dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32))
+    dataset = dataset.batch(batch_size)
+    label = []
+    text_ids = []
+    mask_ids = []
+    segment_ids = []
+    for data in dataset:
+        label.append(data[0])
+        text_ids.append(data[1])
+        mask_ids.append(data[2])
+        segment_ids.append(data[3])
+    return label, text_ids, mask_ids, segment_ids
diff --git a/model_zoo/bert/src/cluener_evaluation.py b/model_zoo/bert/src/cluener_evaluation.py
index 09de6bf0b34..f4c747ac38c 100644
--- a/model_zoo/bert/src/cluener_evaluation.py
+++ b/model_zoo/bert/src/cluener_evaluation.py
@@ -19,15 +19,13 @@ import json
 import numpy as np
 import mindspore.common.dtype as mstype
 from mindspore.common.tensor import Tensor
-from . import tokenization
-from .sample_process import label_generation, process_one_example_p
-from .evaluation_config import cfg
-from .CRF import postprocess
+from src import tokenization
+from src.sample_process import label_generation, process_one_example_p
+from src.CRF import postprocess
+from src.finetune_eval_config import bert_net_cfg
 
-vocab_file = "./vocab.txt"
-tokenizer_ = tokenization.FullTokenizer(vocab_file=vocab_file)
 
-def process(model, text, sequence_length):
+def process(model=None, text="", tokenizer_=None, use_crf="", label2id_file=""):
     """
     process text.
     """
@@ -36,13 +34,13 @@ def process(model, text, sequence_length):
     res = []
     ids = []
     for i in data:
-        feature = process_one_example_p(tokenizer_, i, max_seq_len=sequence_length)
+        feature = process_one_example_p(tokenizer_, i, max_seq_len=bert_net_cfg.seq_length)
         features.append(feature)
         input_ids, input_mask, token_type_id = feature
         input_ids = Tensor(np.array(input_ids), mstype.int32)
         input_mask = Tensor(np.array(input_mask), mstype.int32)
         token_type_id = Tensor(np.array(token_type_id), mstype.int32)
-        if cfg.use_crf:
+        if use_crf.lower() == "true":
             backpointers, best_tag_id = model.predict(input_ids, input_mask, token_type_id, Tensor(1))
             best_path = postprocess(backpointers, best_tag_id)
             logits = []
@@ -54,19 +52,21 @@ def process(model, text, sequence_length):
             ids = logits.asnumpy()
             ids = np.argmax(ids, axis=-1)
             ids = list(ids)
-    res = label_generation(text, ids)
+    res = label_generation(text=text, probs=ids, label2id_file=label2id_file)
     return res
 
-def submit(model, path, sequence_length):
+def submit(model=None, path="", vocab_file="", use_crf="", label2id_file=""):
     """
     submit task
     """
+    tokenizer_ = tokenization.FullTokenizer(vocab_file=vocab_file)
     data = []
     for line in open(path):
         if not line.strip():
             continue
         oneline = json.loads(line.strip())
-        res = process(model, oneline["text"], sequence_length)
+        res = process(model=model, text=oneline["text"], tokenizer_=tokenizer_,
+                      use_crf=use_crf, label2id_file=label2id_file)
         print("text", oneline["text"])
         print("res:", res)
         data.append(json.dumps({"label": res}, ensure_ascii=False))
diff --git a/model_zoo/bert/src/dataset.py b/model_zoo/bert/src/dataset.py
index 7985ca8559f..e530718d4f1 100644
--- a/model_zoo/bert/src/dataset.py
+++ b/model_zoo/bert/src/dataset.py
@@ -36,8 +36,8 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", e
     ds = de.TFRecordDataset(data_files, schema_dir if schema_dir != "" else None,
                             columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
                                           "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"],
-                            shuffle=(do_shuffle == "true"), num_shards=device_num, shard_id=rank,
-                            shard_equal_rows=True)
+                            shuffle=de.Shuffle.FILES if do_shuffle == "true" else False,
+                            num_shards=device_num, shard_id=rank, shard_equal_rows=True)
     ori_dataset_size = ds.get_dataset_size()
     print('origin dataset size: ', ori_dataset_size)
     new_size = ori_dataset_size
@@ -58,3 +58,77 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", e
     logger.info("data size: {}".format(ds.get_dataset_size()))
     logger.info("repeatcount: {}".format(ds.get_repeat_count()))
     return ds, new_repeat_count
+
+
+def create_ner_dataset(batch_size=1, repeat_count=1, assessment_method="accuracy",
+                       data_file_path=None, schema_file_path=None):
+    """create finetune or evaluation dataset"""
+    type_cast_op = C.TypeCast(mstype.int32)
+    ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
+                            columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"])
+    if assessment_method == "Spearman_correlation":
+        type_cast_op_float = C.TypeCast(mstype.float32)
+        ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
+    else:
+        ds = ds.map(input_columns="label_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
+    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
+    ds = ds.repeat(repeat_count)
+    # apply shuffle operation
+    buffer_size = 960
+    ds = ds.shuffle(buffer_size=buffer_size)
+    # apply batch operations
+    ds = ds.batch(batch_size, drop_remainder=True)
+    return ds
+
+
+def create_classification_dataset(batch_size=1, repeat_count=1, assessment_method="accuracy",
+                                  data_file_path=None, schema_file_path=None):
+    """create finetune or evaluation dataset"""
+    type_cast_op = C.TypeCast(mstype.int32)
+    ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
+                            columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"])
+    if assessment_method == "Spearman_correlation":
+        type_cast_op_float = C.TypeCast(mstype.float32)
+        ds = ds.map(input_columns="label_ids", operations=type_cast_op_float)
+    else:
+        ds = ds.map(input_columns="label_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
+    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
+    ds = ds.repeat(repeat_count)
+    # apply shuffle operation
+    buffer_size = 960
+    ds = ds.shuffle(buffer_size=buffer_size)
+    # apply batch operations
+    ds = ds.batch(batch_size, drop_remainder=True)
+    return ds
+
+
+def create_squad_dataset(batch_size=1, repeat_count=1, data_file_path=None, schema_file_path=None, is_training=True):
+    """create finetune or evaluation dataset"""
+    type_cast_op = C.TypeCast(mstype.int32)
+    if is_training:
+        ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
+                                columns_list=["input_ids", "input_mask", "segment_ids",
+                                              "start_positions", "end_positions",
+                                              "unique_ids", "is_impossible"])
+        ds = ds.map(input_columns="start_positions", operations=type_cast_op)
+        ds = ds.map(input_columns="end_positions", operations=type_cast_op)
+    else:
+        ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
+                                columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"])
+        ds = ds.map(input_columns="input_ids", operations=type_cast_op)
+        ds = ds.map(input_columns="input_mask", operations=type_cast_op)
+        ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
+    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
+    ds = ds.repeat(repeat_count)
+    # apply shuffle operation
+    buffer_size = 960
+    ds = ds.shuffle(buffer_size=buffer_size)
+    # apply batch operations
+    ds = ds.batch(batch_size, drop_remainder=True)
+    return ds
diff --git a/model_zoo/bert/src/finetune_config.py b/model_zoo/bert/src/finetune_config.py
deleted file mode 100644
index 6241d06994a..00000000000
--- a/model_zoo/bert/src/finetune_config.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-config settings, will be used in finetune.py
-"""
-
-from easydict import EasyDict as edict
-import mindspore.common.dtype as mstype
-from .bert_model import BertConfig
-
-cfg = edict({
-    'task': 'NER',
-    'num_labels': 41,
-    'data_file': '/your/path/train.tfrecord',
-    'schema_file': '/your/path/schema.json',
-    'epoch_num': 5,
-    'ckpt_prefix': 'bert',
-    'ckpt_dir': None,
-    'pre_training_ckpt': '/your/path/pre_training.ckpt',
-    'use_crf': False,
-    'optimizer': 'Lamb',
-    'AdamWeightDecayDynamicLR': edict({
-        'learning_rate': 2e-5,
-        'end_learning_rate': 1e-7,
-        'power': 1.0,
-        'weight_decay': 1e-5,
-        'eps': 1e-6,
-    }),
-    'Lamb': edict({
-        'start_learning_rate': 2e-5,
-        'end_learning_rate': 1e-7,
-        'power': 1.0,
-        'weight_decay': 0.01,
-        'decay_filter': lambda x: False,
-    }),
-    'Momentum': edict({
-        'learning_rate': 2e-5,
-        'momentum': 0.9,
-    }),
-})
-
-bert_net_cfg = BertConfig(
-    batch_size=16,
-    seq_length=128,
-    vocab_size=21128,
-    hidden_size=768,
-    num_hidden_layers=12,
-    num_attention_heads=12,
-    intermediate_size=3072,
-    hidden_act="gelu",
-    hidden_dropout_prob=0.1,
-    attention_probs_dropout_prob=0.1,
-    max_position_embeddings=512,
-    type_vocab_size=2,
-    initializer_range=0.02,
-    use_relative_positions=False,
-    input_mask_from_dataset=True,
-    token_type_ids_from_dataset=True,
-    dtype=mstype.float32,
-    compute_type=mstype.float16,
-)
-
-tag_to_index = {
-    "O": 0,
-    "S_address": 1,
-    "B_address": 2,
-    "M_address": 3,
-    "E_address": 4,
-    "S_book": 5,
-    "B_book": 6,
-    "M_book": 7,
-    "E_book": 8,
-    "S_company": 9,
-    "B_company": 10,
-    "M_company": 11,
-    "E_company": 12,
-    "S_game": 13,
-    "B_game": 14,
-    "M_game": 15,
-    "E_game": 16,
-    "S_government": 17,
-    "B_government": 18,
-    "M_government": 19,
-    "E_government": 20,
-    "S_movie": 21,
-    "B_movie": 22,
-    "M_movie": 23,
-    "E_movie": 24,
-    "S_name": 25,
-    "B_name": 26,
-    "M_name": 27,
-    "E_name": 28,
-    "S_organization": 29,
-    "B_organization": 30,
-    "M_organization": 31,
-    "E_organization": 32,
-    "S_position": 33,
-    "B_position": 34,
-    "M_position": 35,
-    "E_position": 36,
-    "S_scene": 37,
-    "B_scene": 38,
-    "M_scene": 39,
-    "E_scene": 40,
-    "<START>": 41,
-    "<STOP>": 42
-}
diff --git a/model_zoo/bert/src/evaluation_config.py b/model_zoo/bert/src/finetune_eval_config.py
similarity index 68%
rename from model_zoo/bert/src/evaluation_config.py
rename to model_zoo/bert/src/finetune_eval_config.py
index b18c5643b00..4b8e121e095 100644
--- a/model_zoo/bert/src/evaluation_config.py
+++ b/model_zoo/bert/src/finetune_eval_config.py
@@ -21,18 +21,30 @@ from easydict import EasyDict as edict
 import mindspore.common.dtype as mstype
 from .bert_model import BertConfig
 
-cfg = edict({
-    'task': 'NER',
-    'num_labels': 41,
-    'data_file': '/your/path/evaluation.tfrecord',
-    'schema_file': '/your/path/schema.json',
-    'finetune_ckpt': '/your/path/your.ckpt',
-    'use_crf': False,
-    'clue_benchmark': False,
+optimizer_cfg = edict({
+    'optimizer': 'Lamb',
+    'AdamWeightDecayDynamicLR': edict({
+        'learning_rate': 2e-5,
+        'end_learning_rate': 1e-7,
+        'power': 1.0,
+        'weight_decay': 1e-5,
+        'eps': 1e-6,
+    }),
+    'Lamb': edict({
+        'start_learning_rate': 2e-5,
+        'end_learning_rate': 1e-7,
+        'power': 1.0,
+        'weight_decay': 0.01,
+        'decay_filter': lambda x: False,
+    }),
+    'Momentum': edict({
+        'learning_rate': 2e-5,
+        'momentum': 0.9,
+    }),
 })
 
 bert_net_cfg = BertConfig(
-    batch_size=16 if not cfg.clue_benchmark else 1,
+    batch_size=16,
     seq_length=128,
     vocab_size=21128,
     hidden_size=768,
@@ -40,8 +52,8 @@ bert_net_cfg = BertConfig(
     num_attention_heads=12,
     intermediate_size=3072,
     hidden_act="gelu",
-    hidden_dropout_prob=0.0,
-    attention_probs_dropout_prob=0.0,
+    hidden_dropout_prob=0.1,
+    attention_probs_dropout_prob=0.1,
     max_position_embeddings=512,
     type_vocab_size=2,
     initializer_range=0.02,
diff --git a/model_zoo/bert/src/finetune_eval_model.py b/model_zoo/bert/src/finetune_eval_model.py
new file mode 100644
index 00000000000..047decc377a
--- /dev/null
+++ b/model_zoo/bert/src/finetune_eval_model.py
@@ -0,0 +1,123 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+'''
+Bert finetune and evaluation model script.
+'''
+
+import mindspore.nn as nn
+from mindspore.common.initializer import TruncatedNormal
+from mindspore.ops import operations as P
+from .bert_model import BertModel
+
+class BertCLSModel(nn.Cell):
+    """
+    This class is responsible for classification task evaluation, i.e. XNLI(num_labels=3),
+    LCQMC(num_labels=2), Chnsenti(num_labels=2). The returned output represents the final
+    logits as the results of log_softmax is propotional to that of softmax.
+    """
+    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False,
+                 assessment_method=""):
+        super(BertCLSModel, self).__init__()
+        if not is_training:
+            config.hidden_dropout_prob = 0.0
+            config.hidden_probs_dropout_prob = 0.0
+        self.bert = BertModel(config, is_training, use_one_hot_embeddings)
+        self.cast = P.Cast()
+        self.weight_init = TruncatedNormal(config.initializer_range)
+        self.log_softmax = P.LogSoftmax(axis=-1)
+        self.dtype = config.dtype
+        self.num_labels = num_labels
+        self.dense_1 = nn.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init,
+                                has_bias=True).to_float(config.compute_type)
+        self.dropout = nn.Dropout(1 - dropout_prob)
+        self.assessment_method = assessment_method
+
+    def construct(self, input_ids, input_mask, token_type_id):
+        _, pooled_output, _ = \
+            self.bert(input_ids, token_type_id, input_mask)
+        cls = self.cast(pooled_output, self.dtype)
+        cls = self.dropout(cls)
+        logits = self.dense_1(cls)
+        logits = self.cast(logits, self.dtype)
+        if self.assessment_method != "spearman_correlation":
+            logits = self.log_softmax(logits)
+        return logits
+
+class BertSquadModel(nn.Cell):
+    '''
+    This class is responsible for SQuAD
+    '''
+    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
+        super(BertSquadModel, self).__init__()
+        if not is_training:
+            config.hidden_dropout_prob = 0.0
+            config.hidden_probs_dropout_prob = 0.0
+        self.bert = BertModel(config, is_training, use_one_hot_embeddings)
+        self.weight_init = TruncatedNormal(config.initializer_range)
+        self.dense1 = nn.Dense(config.hidden_size, num_labels, weight_init=self.weight_init,
+                               has_bias=True).to_float(config.compute_type)
+        self.num_labels = num_labels
+        self.dtype = config.dtype
+        self.log_softmax = P.LogSoftmax(axis=1)
+        self.is_training = is_training
+
+    def construct(self, input_ids, input_mask, token_type_id):
+        sequence_output, _, _ = self.bert(input_ids, token_type_id, input_mask)
+        batch_size, seq_length, hidden_size = P.Shape()(sequence_output)
+        sequence = P.Reshape()(sequence_output, (-1, hidden_size))
+        logits = self.dense1(sequence)
+        logits = P.Cast()(logits, self.dtype)
+        logits = P.Reshape()(logits, (batch_size, seq_length, self.num_labels))
+        logits = self.log_softmax(logits)
+        return logits
+
+class BertNERModel(nn.Cell):
+    """
+    This class is responsible for sequence labeling task evaluation, i.e. NER(num_labels=11).
+    The returned output represents the final logits as the results of log_softmax is propotional to that of softmax.
+    """
+    def __init__(self, config, is_training, num_labels=11, use_crf=False, dropout_prob=0.0,
+                 use_one_hot_embeddings=False):
+        super(BertNERModel, self).__init__()
+        if not is_training:
+            config.hidden_dropout_prob = 0.0
+            config.hidden_probs_dropout_prob = 0.0
+        self.bert = BertModel(config, is_training, use_one_hot_embeddings)
+        self.cast = P.Cast()
+        self.weight_init = TruncatedNormal(config.initializer_range)
+        self.log_softmax = P.LogSoftmax(axis=-1)
+        self.dtype = config.dtype
+        self.num_labels = num_labels
+        self.dense_1 = nn.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init,
+                                has_bias=True).to_float(config.compute_type)
+        self.dropout = nn.Dropout(1 - dropout_prob)
+        self.reshape = P.Reshape()
+        self.shape = (-1, config.hidden_size)
+        self.use_crf = use_crf
+        self.origin_shape = (config.batch_size, config.seq_length, self.num_labels)
+
+    def construct(self, input_ids, input_mask, token_type_id):
+        sequence_output, _, _ = \
+            self.bert(input_ids, token_type_id, input_mask)
+        seq = self.dropout(sequence_output)
+        seq = self.reshape(seq, self.shape)
+        logits = self.dense_1(seq)
+        logits = self.cast(logits, self.dtype)
+        if self.use_crf:
+            return_value = self.reshape(logits, self.origin_shape)
+        else:
+            return_value = self.log_softmax(logits)
+        return return_value
diff --git a/model_zoo/bert/src/sample_process.py b/model_zoo/bert/src/sample_process.py
index 59f3e76a31a..c7cf29c510e 100644
--- a/model_zoo/bert/src/sample_process.py
+++ b/model_zoo/bert/src/sample_process.py
@@ -52,12 +52,12 @@ def process_one_example_p(tokenizer, text, max_seq_len=128):
     feature = (input_ids, input_mask, segment_ids)
     return feature
 
-def label_generation(text, probs):
+def label_generation(text="", probs=None, label2id_file=""):
     """generate label"""
     data = [text]
     probs = [probs]
     result = []
-    label2id = json.loads(open("./label2id.json").read())
+    label2id = json.loads(open(label2id_file).read())
     id2label = [k for k, v in label2id.items()]
 
     for index, prob in enumerate(probs):
diff --git a/model_zoo/bert/src/utils.py b/model_zoo/bert/src/utils.py
index ec5651b2053..dfb6ffa5fef 100644
--- a/model_zoo/bert/src/utils.py
+++ b/model_zoo/bert/src/utils.py
@@ -17,347 +17,13 @@
 Functional Cells used in Bert finetune and evaluation.
 """
 
+import os
 import mindspore.nn as nn
-from mindspore.common.initializer import TruncatedNormal
 from mindspore.ops import operations as P
-from mindspore.ops import functional as F
-from mindspore.ops import composite as C
 from mindspore.common.tensor import Tensor
-from mindspore.common.parameter import Parameter, ParameterTuple
 from mindspore.common import dtype as mstype
-from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
-from mindspore.train.parallel_utils import ParallelMode
-from mindspore.communication.management import get_group_size
-from mindspore import context
-from .bert_model import BertModel
-from .bert_for_pre_training import clip_grad
-from .CRF import CRF
+from mindspore.train.callback import Callback
 
-GRADIENT_CLIP_TYPE = 1
-GRADIENT_CLIP_VALUE = 1.0
-grad_scale = C.MultitypeFuncGraph("grad_scale")
-reciprocal = P.Reciprocal()
-
-@grad_scale.register("Tensor", "Tensor")
-def tensor_grad_scale(scale, grad):
-    return grad * reciprocal(scale)
-
-_grad_overflow = C.MultitypeFuncGraph("_grad_overflow")
-grad_overflow = P.FloatStatus()
-
-@_grad_overflow.register("Tensor")
-def _tensor_grad_overflow(grad):
-    return grad_overflow(grad)
-
-class BertFinetuneCell(nn.Cell):
-    """
-    Especifically defined for finetuning where only four inputs tensor are needed.
-    """
-    def __init__(self, network, optimizer, scale_update_cell=None):
-
-        super(BertFinetuneCell, self).__init__(auto_prefix=False)
-        self.network = network
-        self.weights = ParameterTuple(network.trainable_params())
-        self.optimizer = optimizer
-        self.grad = C.GradOperation('grad',
-                                    get_by_list=True,
-                                    sens_param=True)
-        self.reducer_flag = False
-        self.allreduce = P.AllReduce()
-        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
-        if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
-            self.reducer_flag = True
-        self.grad_reducer = None
-        if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
-            degree = get_group_size()
-            self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
-        self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
-        self.cast = P.Cast()
-        self.gpu_target = False
-        if context.get_context("device_target") == "GPU":
-            self.gpu_target = True
-            self.float_status = P.FloatStatus()
-            self.addn = P.AddN()
-            self.reshape = P.Reshape()
-        else:
-            self.alloc_status = P.NPUAllocFloatStatus()
-            self.get_status = P.NPUGetFloatStatus()
-            self.clear_before_grad = P.NPUClearFloatStatus()
-        self.reduce_sum = P.ReduceSum(keep_dims=False)
-        self.depend_parameter_use = P.ControlDepend(depend_mode=1)
-        self.base = Tensor(1, mstype.float32)
-        self.less_equal = P.LessEqual()
-        self.hyper_map = C.HyperMap()
-        self.loss_scale = None
-        self.loss_scaling_manager = scale_update_cell
-        if scale_update_cell:
-            self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
-                                        name="loss_scale")
-
-    def construct(self,
-                  input_ids,
-                  input_mask,
-                  token_type_id,
-                  label_ids,
-                  sens=None):
-
-
-        weights = self.weights
-        init = False
-        loss = self.network(input_ids,
-                            input_mask,
-                            token_type_id,
-                            label_ids)
-        if sens is None:
-            scaling_sens = self.loss_scale
-        else:
-            scaling_sens = sens
-
-        if not self.gpu_target:
-            init = self.alloc_status()
-            clear_before_grad = self.clear_before_grad(init)
-            F.control_depend(loss, init)
-            self.depend_parameter_use(clear_before_grad, scaling_sens)
-        grads = self.grad(self.network, weights)(input_ids,
-                                                 input_mask,
-                                                 token_type_id,
-                                                 label_ids,
-                                                 self.cast(scaling_sens,
-                                                           mstype.float32))
-        grads = self.hyper_map(F.partial(grad_scale, scaling_sens), grads)
-        grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
-        if self.reducer_flag:
-            grads = self.grad_reducer(grads)
-        if not self.gpu_target:
-            flag = self.get_status(init)
-            flag_sum = self.reduce_sum(init, (0,))
-            F.control_depend(grads, flag)
-            F.control_depend(flag, flag_sum)
-        else:
-            flag_sum = self.hyper_map(F.partial(_grad_overflow), grads)
-            flag_sum = self.addn(flag_sum)
-            flag_sum = self.reshape(flag_sum, (()))
-        if self.is_distributed:
-            flag_reduce = self.allreduce(flag_sum)
-            cond = self.less_equal(self.base, flag_reduce)
-        else:
-            cond = self.less_equal(self.base, flag_sum)
-        overflow = cond
-        if sens is None:
-            overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond)
-        return F.depend(ret, succ)
-
-class BertSquadCell(nn.Cell):
-    """
-    specifically defined for finetuning where only four inputs tensor are needed.
-    """
-    def __init__(self, network, optimizer, scale_update_cell=None):
-        super(BertSquadCell, self).__init__(auto_prefix=False)
-        self.network = network
-        self.weights = ParameterTuple(network.trainable_params())
-        self.optimizer = optimizer
-        self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
-        self.reducer_flag = False
-        self.allreduce = P.AllReduce()
-        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
-        if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
-            self.reducer_flag = True
-        self.grad_reducer = None
-        if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
-            degree = get_group_size()
-            self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
-        self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
-        self.cast = P.Cast()
-        self.alloc_status = P.NPUAllocFloatStatus()
-        self.get_status = P.NPUGetFloatStatus()
-        self.clear_before_grad = P.NPUClearFloatStatus()
-        self.reduce_sum = P.ReduceSum(keep_dims=False)
-        self.depend_parameter_use = P.ControlDepend(depend_mode=1)
-        self.base = Tensor(1, mstype.float32)
-        self.less_equal = P.LessEqual()
-        self.hyper_map = C.HyperMap()
-        self.loss_scale = None
-        self.loss_scaling_manager = scale_update_cell
-        if scale_update_cell:
-            self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
-                                        name="loss_scale")
-    def construct(self,
-                  input_ids,
-                  input_mask,
-                  token_type_id,
-                  start_position,
-                  end_position,
-                  unique_id,
-                  is_impossible,
-                  sens=None):
-        weights = self.weights
-        init = self.alloc_status()
-        loss = self.network(input_ids,
-                            input_mask,
-                            token_type_id,
-                            start_position,
-                            end_position,
-                            unique_id,
-                            is_impossible)
-        if sens is None:
-            scaling_sens = self.loss_scale
-        else:
-            scaling_sens = sens
-        grads = self.grad(self.network, weights)(input_ids,
-                                                 input_mask,
-                                                 token_type_id,
-                                                 start_position,
-                                                 end_position,
-                                                 unique_id,
-                                                 is_impossible,
-                                                 self.cast(scaling_sens,
-                                                           mstype.float32))
-        clear_before_grad = self.clear_before_grad(init)
-        F.control_depend(loss, init)
-        self.depend_parameter_use(clear_before_grad, scaling_sens)
-        grads = self.hyper_map(F.partial(grad_scale, scaling_sens), grads)
-        grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
-        if self.reducer_flag:
-            grads = self.grad_reducer(grads)
-        flag = self.get_status(init)
-        flag_sum = self.reduce_sum(init, (0,))
-        if self.is_distributed:
-            flag_reduce = self.allreduce(flag_sum)
-            cond = self.less_equal(self.base, flag_reduce)
-        else:
-            cond = self.less_equal(self.base, flag_sum)
-        F.control_depend(grads, flag)
-        F.control_depend(flag, flag_sum)
-        overflow = cond
-        if sens is None:
-            overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond)
-        return F.depend(ret, succ)
-
-
-class BertRegressionModel(nn.Cell):
-    """
-    Bert finetune model for regression task
-    """
-    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
-        super(BertRegressionModel, self).__init__()
-        self.bert = BertModel(config, is_training, use_one_hot_embeddings)
-        self.cast = P.Cast()
-        self.weight_init = TruncatedNormal(config.initializer_range)
-        self.log_softmax = P.LogSoftmax(axis=-1)
-        self.dtype = config.dtype
-        self.num_labels = num_labels
-        self.dropout = nn.Dropout(1 - dropout_prob)
-        self.dense_1 = nn.Dense(config.hidden_size, 1, weight_init=self.weight_init,
-                                has_bias=True).to_float(mstype.float16)
-
-    def construct(self, input_ids, input_mask, token_type_id):
-        _, pooled_output, _ = self.bert(input_ids, token_type_id, input_mask)
-        cls = self.cast(pooled_output, self.dtype)
-        cls = self.dropout(cls)
-        logits = self.dense_1(cls)
-        logits = self.cast(logits, self.dtype)
-        return logits
-
-
-class BertCLSModel(nn.Cell):
-    """
-    This class is responsible for classification task evaluation, i.e. XNLI(num_labels=3),
-    LCQMC(num_labels=2), Chnsenti(num_labels=2). The returned output represents the final
-    logits as the results of log_softmax is propotional to that of softmax.
-    """
-    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
-        super(BertCLSModel, self).__init__()
-        self.bert = BertModel(config, is_training, use_one_hot_embeddings)
-        self.cast = P.Cast()
-        self.weight_init = TruncatedNormal(config.initializer_range)
-        self.log_softmax = P.LogSoftmax(axis=-1)
-        self.dtype = config.dtype
-        self.num_labels = num_labels
-        self.dense_1 = nn.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init,
-                                has_bias=True).to_float(config.compute_type)
-        self.dropout = nn.Dropout(1 - dropout_prob)
-
-    def construct(self, input_ids, input_mask, token_type_id):
-        _, pooled_output, _ = \
-            self.bert(input_ids, token_type_id, input_mask)
-        cls = self.cast(pooled_output, self.dtype)
-        cls = self.dropout(cls)
-        logits = self.dense_1(cls)
-        logits = self.cast(logits, self.dtype)
-        log_probs = self.log_softmax(logits)
-        return log_probs
-
-class BertSquadModel(nn.Cell):
-    """
-    Bert finetune model for SQuAD v1.1 task
-    """
-    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
-        super(BertSquadModel, self).__init__()
-        self.bert = BertModel(config, is_training, use_one_hot_embeddings)
-        self.weight_init = TruncatedNormal(config.initializer_range)
-        self.dense1 = nn.Dense(config.hidden_size, num_labels, weight_init=self.weight_init,
-                               has_bias=True).to_float(config.compute_type)
-        self.num_labels = num_labels
-        self.dtype = config.dtype
-        self.log_softmax = P.LogSoftmax(axis=1)
-        self.is_training = is_training
-
-    def construct(self, input_ids, input_mask, token_type_id):
-        sequence_output, _, _ = self.bert(input_ids, token_type_id, input_mask)
-        batch_size, seq_length, hidden_size = P.Shape()(sequence_output)
-        sequence = P.Reshape()(sequence_output, (-1, hidden_size))
-        logits = self.dense1(sequence)
-        logits = P.Cast()(logits, self.dtype)
-        logits = P.Reshape()(logits, (batch_size, seq_length, self.num_labels))
-        logits = self.log_softmax(logits)
-        return logits
-
-class BertNERModel(nn.Cell):
-    """
-    This class is responsible for sequence labeling task evaluation, i.e. NER(num_labels=11).
-    The returned output represents the final logits as the results of log_softmax is propotional to that of softmax.
-    """
-    def __init__(self, config, is_training, num_labels=11, use_crf=False, dropout_prob=0.0,
-                 use_one_hot_embeddings=False):
-        super(BertNERModel, self).__init__()
-        self.bert = BertModel(config, is_training, use_one_hot_embeddings)
-        self.cast = P.Cast()
-        self.weight_init = TruncatedNormal(config.initializer_range)
-        self.log_softmax = P.LogSoftmax(axis=-1)
-        self.dtype = config.dtype
-        self.num_labels = num_labels
-        self.dense_1 = nn.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init,
-                                has_bias=True).to_float(config.compute_type)
-        self.dropout = nn.Dropout(1 - dropout_prob)
-        self.reshape = P.Reshape()
-        self.shape = (-1, config.hidden_size)
-        self.use_crf = use_crf
-        self.origin_shape = (config.batch_size, config.seq_length, self.num_labels)
-
-    def construct(self, input_ids, input_mask, token_type_id):
-        sequence_output, _, _ = \
-            self.bert(input_ids, token_type_id, input_mask)
-        seq = self.dropout(sequence_output)
-        seq = self.reshape(seq, self.shape)
-        logits = self.dense_1(seq)
-        logits = self.cast(logits, self.dtype)
-        if self.use_crf:
-            return_value = self.reshape(logits, self.origin_shape)
-        else:
-            return_value = self.log_softmax(logits)
-        return return_value
 
 class CrossEntropyCalculation(nn.Cell):
     """
@@ -387,95 +53,73 @@ class CrossEntropyCalculation(nn.Cell):
             return_value = logits * 1.0
         return return_value
 
-class BertCLS(nn.Cell):
-    """
-    Train interface for classification finetuning task.
-    """
-    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
-        super(BertCLS, self).__init__()
-        self.bert = BertCLSModel(config, is_training, num_labels, dropout_prob, use_one_hot_embeddings)
-        self.loss = CrossEntropyCalculation(is_training)
-        self.num_labels = num_labels
-    def construct(self, input_ids, input_mask, token_type_id, label_ids):
-        log_probs = self.bert(input_ids, input_mask, token_type_id)
-        loss = self.loss(log_probs, label_ids, self.num_labels)
-        return loss
 
+def make_directory(path: str):
+    """Make directory."""
+    if path is None or not isinstance(path, str) or path.strip() == "":
+        logger.error("The path(%r) is invalid type.", path)
+        raise TypeError("Input path is invaild type")
 
-class BertNER(nn.Cell):
-    """
-    Train interface for sequence labeling finetuning task.
-    """
-    def __init__(self, config, is_training, num_labels=11, use_crf=False, tag_to_index=None, dropout_prob=0.0,
-                 use_one_hot_embeddings=False):
-        super(BertNER, self).__init__()
-        self.bert = BertNERModel(config, is_training, num_labels, use_crf, dropout_prob, use_one_hot_embeddings)
-        if use_crf:
-            if not tag_to_index:
-                raise Exception("The dict for tag-index mapping should be provided for CRF.")
-            self.loss = CRF(tag_to_index, config.batch_size, config.seq_length, is_training)
-        else:
-            self.loss = CrossEntropyCalculation(is_training)
-        self.num_labels = num_labels
-        self.use_crf = use_crf
-    def construct(self, input_ids, input_mask, token_type_id, label_ids):
-        logits = self.bert(input_ids, input_mask, token_type_id)
-        if self.use_crf:
-            loss = self.loss(logits, label_ids)
-        else:
-            loss = self.loss(logits, label_ids, self.num_labels)
-        return loss
+    # convert the relative paths
+    path = os.path.realpath(path)
+    logger.debug("The abs path is %r", path)
 
-class BertSquad(nn.Cell):
-    """
-    Train interface for SQuAD finetuning task.
-    """
-    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
-        super(BertSquad, self).__init__()
-        self.bert = BertSquadModel(config, is_training, num_labels, dropout_prob, use_one_hot_embeddings)
-        self.loss = CrossEntropyCalculation(is_training)
-        self.num_labels = num_labels
-        self.seq_length = config.seq_length
-        self.is_training = is_training
-        self.total_num = Parameter(Tensor([0], mstype.float32), name='total_num')
-        self.start_num = Parameter(Tensor([0], mstype.float32), name='start_num')
-        self.end_num = Parameter(Tensor([0], mstype.float32), name='end_num')
-        self.sum = P.ReduceSum()
-        self.equal = P.Equal()
-        self.argmax = P.ArgMaxWithValue(axis=1)
-        self.squeeze = P.Squeeze(axis=-1)
+    # check the path is exist and write permissions?
+    if os.path.exists(path):
+        real_path = path
+    else:
+        # All exceptions need to be caught because create directory maybe have some limit(permissions)
+        logger.debug("The directory(%s) doesn't exist, will create it", path)
+        try:
+            os.makedirs(path, exist_ok=True)
+            real_path = path
+        except PermissionError as e:
+            logger.error("No write permission on the directory(%r), error = %r", path, e)
+            raise TypeError("No write permission on the directory.")
+    return real_path
 
-    def construct(self, input_ids, input_mask, token_type_id, start_position, end_position, unique_id, is_impossible):
-        logits = self.bert(input_ids, input_mask, token_type_id)
-        if self.is_training:
-            unstacked_logits_0 = self.squeeze(logits[:, :, 0:1])
-            unstacked_logits_1 = self.squeeze(logits[:, :, 1:2])
-            start_loss = self.loss(unstacked_logits_0, start_position, self.seq_length)
-            end_loss = self.loss(unstacked_logits_1, end_position, self.seq_length)
-            total_loss = (start_loss + end_loss) / 2.0
-        else:
-            start_logits = self.squeeze(logits[:, :, 0:1])
-            end_logits = self.squeeze(logits[:, :, 1:2])
-            total_loss = (unique_id, start_logits, end_logits)
-        return total_loss
-
-
-class BertReg(nn.Cell):
+class LossCallBack(Callback):
     """
-    Bert finetune model with loss for regression task
+    Monitor the loss in training.
+    If the loss in NAN or INF terminating training.
+    Note:
+        if per_print_times is 0 do not print loss.
+    Args:
+        per_print_times (int): Print loss every times. Default: 1.
     """
-    def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
-        super(BertReg, self).__init__()
-        self.bert = BertRegressionModel(config, is_training, num_labels, dropout_prob, use_one_hot_embeddings)
-        self.loss = nn.MSELoss()
-        self.is_training = is_training
-        self.sigmoid = P.Sigmoid()
-        self.cast = P.Cast()
-        self.mul = P.Mul()
-    def construct(self, input_ids, input_mask, token_type_id, labels):
-        logits = self.bert(input_ids, input_mask, token_type_id)
-        if self.is_training:
-            loss = self.loss(logits, labels)
-        else:
-            loss = logits
-        return loss
+    def __init__(self, per_print_times=1):
+        super(LossCallBack, self).__init__()
+        if not isinstance(per_print_times, int) or per_print_times < 0:
+            raise ValueError("print_step must be int and >= 0")
+        self._per_print_times = per_print_times
+    def step_end(self, run_context):
+        cb_params = run_context.original_args()
+        print("epoch: {}, step: {}, outputs are {}".format(cb_params.cur_epoch_num, cb_params.cur_step_num,
+                                                           str(cb_params.net_outputs)))
+
+def LoadNewestCkpt(load_finetune_checkpoint_dir, steps_per_epoch, epoch_num, prefix):
+    """
+    Find the ckpt finetune generated and load it into eval network.
+    """
+    files = os.listdir(load_finetune_checkpoint_dir)
+    pre_len = len(prefix)
+    max_num = 0
+    for filename in files:
+        name_ext = os.path.splitext(filename)
+        if name_ext[-1] != ".ckpt":
+            continue
+        #steps_per_epoch = ds.get_dataset_size()
+        if filename.find(prefix) == 0 and not filename[pre_len].isalpha():
+            index = filename[pre_len:].find("-")
+            if index == 0 and max_num == 0:
+                load_finetune_checkpoint_path = os.path.join(load_finetune_checkpoint_dir, filename)
+            elif index not in (0, -1):
+                name_split = name_ext[-2].split('_')
+                if (steps_per_epoch != int(name_split[len(name_split)-1])) \
+                        or (epoch_num != int(filename[pre_len + index + 1:pre_len + index + 2])):
+                    continue
+                num = filename[pre_len + 1:pre_len + index]
+                if int(num) > max_num:
+                    max_num = int(num)
+                    load_finetune_checkpoint_path = os.path.join(load_finetune_checkpoint_dir, filename)
+    return load_finetune_checkpoint_path
diff --git a/model_zoo/faster_rcnn/eval.py b/model_zoo/faster_rcnn/eval.py
index e0b4e2d0ea1..d8dd2ed79ad 100644
--- a/model_zoo/faster_rcnn/eval.py
+++ b/model_zoo/faster_rcnn/eval.py
@@ -40,7 +40,7 @@ parser.add_argument("--checkpoint_path", type=str, required=True, help="Checkpoi
 parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
 args_opt = parser.parse_args()
 
-context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True, device_id=args_opt.device_id)
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
 
 def FasterRcnn_eval(dataset_path, ckpt_path, ann_file):
     """FasterRcnn evaluation."""
diff --git a/model_zoo/faster_rcnn/src/FasterRcnn/fpn_neck.py b/model_zoo/faster_rcnn/src/FasterRcnn/fpn_neck.py
index 05d6d1c9d11..bcf0536f5be 100644
--- a/model_zoo/faster_rcnn/src/FasterRcnn/fpn_neck.py
+++ b/model_zoo/faster_rcnn/src/FasterRcnn/fpn_neck.py
@@ -22,7 +22,7 @@ from mindspore.common.tensor import Tensor
 from mindspore.common import dtype as mstype
 from mindspore.common.initializer import initializer
 
-context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True)
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 
 def bias_init_zeros(shape):
     """Bias init method."""
diff --git a/model_zoo/faster_rcnn/src/FasterRcnn/proposal_generator.py b/model_zoo/faster_rcnn/src/FasterRcnn/proposal_generator.py
index 9428b209141..f9bcc47df4f 100644
--- a/model_zoo/faster_rcnn/src/FasterRcnn/proposal_generator.py
+++ b/model_zoo/faster_rcnn/src/FasterRcnn/proposal_generator.py
@@ -22,7 +22,7 @@ from mindspore import Tensor
 from mindspore import context
 
 
-context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True)
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 
 
 class Proposal(nn.Cell):
diff --git a/model_zoo/faster_rcnn/src/FasterRcnn/resnet50.py b/model_zoo/faster_rcnn/src/FasterRcnn/resnet50.py
index 20d9ee1f345..002ea08d0c5 100644
--- a/model_zoo/faster_rcnn/src/FasterRcnn/resnet50.py
+++ b/model_zoo/faster_rcnn/src/FasterRcnn/resnet50.py
@@ -22,7 +22,7 @@ from mindspore.ops import functional as F
 from mindspore import context
 
 
-context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True)
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 
 
 def weight_init_ones(shape):
diff --git a/model_zoo/faster_rcnn/train.py b/model_zoo/faster_rcnn/train.py
index 3cc86c7cc11..7d5f190bab3 100644
--- a/model_zoo/faster_rcnn/train.py
+++ b/model_zoo/faster_rcnn/train.py
@@ -52,7 +52,7 @@ parser.add_argument("--device_num", type=int, default=1, help="Use device nums,
 parser.add_argument("--rank_id", type=int, default=0, help="Rank id, default is 0.")
 args_opt = parser.parse_args()
 
-context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True, device_id=args_opt.device_id)
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
 
 if __name__ == '__main__':
     if not args_opt.do_eval and args_opt.run_distribute:
diff --git a/model_zoo/gat/README.md b/model_zoo/gat/README.md
index 7c30e088517..0c46aebbaf4 100644
--- a/model_zoo/gat/README.md
+++ b/model_zoo/gat/README.md
@@ -72,9 +72,9 @@ sh run_process_data.sh [SRC_PATH] [DATASET_NAME]
 >> Launch
 ```
 #Generate dataset in mindrecord format for cora
-sh run_process_data.sh cora
+./run_process_data.sh ./data cora
 #Generate dataset in mindrecord format for citeseer
-sh run_process_data.sh citeseer
+./run_process_data.sh ./data citeseer
 ```
 
 # Features
diff --git a/model_zoo/gat/train.py b/model_zoo/gat/train.py
index af1808b995a..acfbb05b78a 100644
--- a/model_zoo/gat/train.py
+++ b/model_zoo/gat/train.py
@@ -96,6 +96,8 @@ def train():
             if eval_acc >= val_acc_max and eval_loss < val_loss_min:
                 val_acc_model = eval_acc
                 val_loss_model = eval_loss
+                if os.path.exists("ckpts/gat.ckpt"):
+                    os.remove("ckpts/gat.ckpt")
                 _exec_save_checkpoint(train_net.network, "ckpts/gat.ckpt")
             val_acc_max = np.max((val_acc_max, eval_acc))
             val_loss_min = np.min((val_loss_min, eval_loss))
diff --git a/model_zoo/googlenet/scripts/run_train.sh b/model_zoo/googlenet/scripts/run_train.sh
index c21c2f04b68..e8c045c8b18 100644
--- a/model_zoo/googlenet/scripts/run_train.sh
+++ b/model_zoo/googlenet/scripts/run_train.sh
@@ -33,10 +33,12 @@ MINDSPORE_HCCL_CONFIG_PATH=$(realpath $1)
 export MINDSPORE_HCCL_CONFIG_PATH
 echo "MINDSPORE_HCCL_CONFIG_PATH=${MINDSPORE_HCCL_CONFIG_PATH}"
 
+export SERVER_ID=0
+rank_start=$((DEVICE_NUM * SERVER_ID))
 for((i=0; i<${DEVICE_NUM}; i++))
 do
     export DEVICE_ID=$i
-    export RANK_ID=$i
+    export RANK_ID=$((rank_start + i))
     rm -rf ./train_parallel$i
     mkdir ./train_parallel$i
     cp -r ./src ./train_parallel$i
diff --git a/model_zoo/googlenet/src/dataset.py b/model_zoo/googlenet/src/dataset.py
index a1cbc2cdabb..a3f74a06178 100644
--- a/model_zoo/googlenet/src/dataset.py
+++ b/model_zoo/googlenet/src/dataset.py
@@ -31,8 +31,7 @@ def create_dataset(data_home, repeat_num=1, training=True):
     if not training:
         data_dir = os.path.join(data_home, "cifar-10-verify-bin")
 
-    rank_size = int(os.environ.get("RANK_SIZE")) if os.environ.get("RANK_SIZE") else None
-    rank_id = int(os.environ.get("RANK_ID")) if os.environ.get("RANK_ID") else None
+    rank_size, rank_id = _get_rank_info()
     data_set = ds.Cifar10Dataset(data_dir, num_shards=rank_size, shard_id=rank_id)
 
     resize_height = cfg.image_height
@@ -65,3 +64,19 @@ def create_dataset(data_home, repeat_num=1, training=True):
     data_set = data_set.batch(batch_size=cfg.batch_size, drop_remainder=True)
 
     return data_set
+
+
+def _get_rank_info():
+    """
+    get rank size and rank id
+    """
+    rank_size = int(os.environ.get("RANK_SIZE", 1))
+
+    if rank_size > 1:
+        from mindspore.communication.management import get_rank, get_group_size
+        rank_size = get_group_size()
+        rank_id = get_rank()
+    else:
+        rank_size = rank_id = None
+
+    return rank_size, rank_id
diff --git a/model_zoo/lenet_quant/src/loss_monitor.py b/model_zoo/lenet_quant/src/loss_monitor.py
new file mode 100644
index 00000000000..59c222d23dd
--- /dev/null
+++ b/model_zoo/lenet_quant/src/loss_monitor.py
@@ -0,0 +1,92 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""LossMonitor Callback class."""
+
+import time
+import numpy as np
+from mindspore.common.tensor import Tensor
+from mindspore.train.callback import Callback
+
+
+class LossMonitor(Callback):
+    """
+    Monitor the loss in training.
+
+    If the loss is NAN or INF, it will terminate training.
+
+    Note:
+        If per_print_times is 0 do not print loss.
+
+    Args:
+        per_print_times (int): Print loss every times. Default: 1.
+        lr_init (numpy array): train learning rate. Default: None.
+
+    Raises:
+        ValueError: If print_step is not int or less than zero.
+
+    Examples:
+        >>> LossMonitor(100, lr_init=Tensor([0.05]*100).asnumpy())
+    """
+
+    def __init__(self, per_print_times=1, lr_init=None):
+        super(LossMonitor, self).__init__()
+        if not isinstance(per_print_times, int) or per_print_times < 0:
+            raise ValueError("print_step must be int and >= 0.")
+        self._per_print_times = per_print_times
+        self.lr_init = lr_init
+
+    def epoch_begin(self, run_context):
+        self.losses = []
+        self.epoch_time = time.time()
+
+    def epoch_end(self, run_context):
+        cb_params = run_context.original_args()
+        epoch_mseconds = (time.time() - self.epoch_time) * 1000
+        per_step_mseconds = epoch_mseconds / cb_params.batch_num
+        print("Epoch time: {:5.3f}, per step time: {:5.3f}, "
+              "avg loss: {:5.3f}".format(epoch_mseconds,
+                                         per_step_mseconds,
+                                         np.mean(self.losses)))
+        print("*" * 60)
+
+    def step_begin(self, run_context):
+        self.step_time = time.time()
+
+    def step_end(self, run_context):
+        cb_params = run_context.original_args()
+        step_mseconds = (time.time() - self.step_time) * 1000
+        step_loss = cb_params.net_outputs
+
+        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
+            step_loss = step_loss[0]
+        if isinstance(step_loss, Tensor):
+            step_loss = np.mean(step_loss.asnumpy())
+
+        self.losses.append(step_loss)
+        cur_step_in_epoch = int((cb_params.cur_step_num - 1) % cb_params.batch_num) + 1
+
+        if isinstance(step_loss, float) and (np.isnan(step_loss) or np.isinf(step_loss)):
+            raise ValueError("Epoch: [{:3d}/{:3d}], step: [{:5d}/{:5d}]. "
+                             "Invalid loss, terminating training.".format(
+                                 cb_params.cur_epoch_num - 1, cb_params.epoch_num,
+                                 cur_step_in_epoch, cb_params.batch_num))
+
+        if self._per_print_times != 0 and cb_params.cur_step_num % self._per_print_times == 0:
+            print("Epoch: [{:3d}/{:3d}], step: [{:5d}/{:5d}], "
+                  "loss: [{:5.4f}], avg loss: [{:5.4f}], time: [{:5.4f}ms]".format(
+                      cb_params.cur_epoch_num, cb_params.epoch_num,
+                      cur_step_in_epoch, int(cb_params.batch_num),
+                      step_loss, np.mean(self.losses),
+                      step_mseconds), flush=True)
diff --git a/model_zoo/lenet_quant/train.py b/model_zoo/lenet_quant/train.py
index 2cff465832c..03e9ff62bde 100644
--- a/model_zoo/lenet_quant/train.py
+++ b/model_zoo/lenet_quant/train.py
@@ -22,12 +22,13 @@ import os
 import argparse
 import mindspore.nn as nn
 from mindspore import context
-from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
+from mindspore.train.callback import ModelCheckpoint, CheckpointConfig
 from mindspore.train import Model
 from mindspore.nn.metrics import Accuracy
 from src.dataset import create_dataset
 from src.config import mnist_cfg as cfg
 from src.lenet_fusion import LeNet5 as LeNet5Fusion
+from src.loss_monitor import LossMonitor
 
 parser = argparse.ArgumentParser(description='MindSpore MNIST Example')
 parser.add_argument('--device_target', type=str, default="Ascend",
diff --git a/model_zoo/lenet_quant/train_quant.py b/model_zoo/lenet_quant/train_quant.py
index 6f27cec1e35..3a87ccc70d0 100644
--- a/model_zoo/lenet_quant/train_quant.py
+++ b/model_zoo/lenet_quant/train_quant.py
@@ -23,13 +23,14 @@ import argparse
 import mindspore.nn as nn
 from mindspore import context
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
+from mindspore.train.callback import ModelCheckpoint, CheckpointConfig
 from mindspore.train import Model
 from mindspore.nn.metrics import Accuracy
 from mindspore.train.quant import quant
 from src.dataset import create_dataset
 from src.config import mnist_cfg as cfg
 from src.lenet_fusion import LeNet5 as LeNet5Fusion
+from src.loss_monitor import LossMonitor
 
 parser = argparse.ArgumentParser(description='MindSpore MNIST Example')
 parser.add_argument('--device_target', type=str, default="Ascend",
diff --git a/model_zoo/mass/eval.py b/model_zoo/mass/eval.py
index 4da63a73336..bb844e91027 100644
--- a/model_zoo/mass/eval.py
+++ b/model_zoo/mass/eval.py
@@ -15,15 +15,13 @@
 """Evaluation api."""
 import argparse
 import pickle
-import numpy as np
 
 from mindspore.common import dtype as mstype
 
 from config import TransformerConfig
-from src.transformer import infer
-from src.utils import ngram_ppl
+from src.transformer import infer, infer_ppl
 from src.utils import Dictionary
-from src.utils import rouge
+from src.utils import get_score
 
 parser = argparse.ArgumentParser(description='Evaluation MASS.')
 parser.add_argument("--config", type=str, required=True,
@@ -32,6 +30,8 @@ parser.add_argument("--vocab", type=str, required=True,
                     help="Vocabulary to use.")
 parser.add_argument("--output", type=str, required=True,
                     help="Result file path.")
+parser.add_argument("--metric", type=str, default='rouge',
+                    help='Set eval method.')
 
 
 def get_config(config):
@@ -45,31 +45,15 @@ if __name__ == '__main__':
     args, _ = parser.parse_known_args()
     vocab = Dictionary.load_from_persisted_dict(args.vocab)
     _config = get_config(args.config)
-    result = infer(_config)
+
+    if args.metric == 'rouge':
+        result = infer(_config)
+    else:
+        result = infer_ppl(_config)
+
     with open(args.output, "wb") as f:
         pickle.dump(result, f, 1)
 
-    ppl_score = 0.
-    preds = []
-    tgts = []
-    _count = 0
-    for sample in result:
-        sentence_prob = np.array(sample['prediction_prob'], dtype=np.float32)
-        sentence_prob = sentence_prob[:, 1:]
-        _ppl = []
-        for path in sentence_prob:
-            _ppl.append(ngram_ppl(path, log_softmax=True))
-        ppl = np.min(_ppl)
-        preds.append(' '.join([vocab[t] for t in sample['prediction']]))
-        tgts.append(' '.join([vocab[t] for t in sample['target']]))
-        print(f" | source: {' '.join([vocab[t] for t in sample['source']])}")
-        print(f" | target: {tgts[-1]}")
-        print(f" | prediction: {preds[-1]}")
-        print(f" | ppl: {ppl}.")
-        if np.isinf(ppl):
-            continue
-        ppl_score += ppl
-        _count += 1
-
-    print(f" | PPL={ppl_score / _count}.")
-    rouge(preds, tgts)
+    # get score by given metric
+    score = get_score(result, vocab, metric=args.metric)
+    print(score)
diff --git a/model_zoo/mass/scripts/run.sh b/model_zoo/mass/scripts/run.sh
index 91bed510eaa..132e38dae2d 100644
--- a/model_zoo/mass/scripts/run.sh
+++ b/model_zoo/mass/scripts/run.sh
@@ -18,7 +18,7 @@ export DEVICE_ID=0
 export RANK_ID=0
 export RANK_SIZE=1
 
-options=`getopt -u -o ht:n:i:j:c:o:v: -l help,task:,device_num:,device_id:,hccl_json:,config:,output:,vocab: -- "$@"`
+options=`getopt -u -o ht:n:i:j:c:o:v:m: -l help,task:,device_num:,device_id:,hccl_json:,config:,output:,vocab:,metric: -- "$@"`
 eval set -- "$options"
 echo $options
 
@@ -35,6 +35,7 @@ echo_help()
   echo "        -c --config              set the configuration file"
   echo "        -o --output              set the output file of inference"
   echo "        -v --vocab               set the vocabulary"
+  echo "        -m --metric              set the metric"
 }
 
 set_hccl_json()
@@ -43,8 +44,8 @@ set_hccl_json()
   do
     if [[ "$1" == "-j" || "$1"  == "--hccl_json" ]]
     then
-      export MINDSPORE_HCCL_CONFIG_PATH=$2 #/data/wsc/hccl_2p_01.json
-      export RANK_TABLE_FILE=$2 #/data/wsc/hccl_2p_01.json
+      export MINDSPORE_HCCL_CONFIG_PATH=$2
+      export RANK_TABLE_FILE=$2
       break
     fi
     shift
@@ -119,6 +120,11 @@ do
     vocab=$2
     shift 2
     ;;
+  -m|--metric)
+    echo "metric";
+    metric=$2
+    shift 2
+    ;;
   --)
     shift
     break
@@ -163,7 +169,7 @@ do
     python train.py --config ${configurations##*/} >>log.log 2>&1 &
   elif [ "$task" == "infer" ]
   then
-    python eval.py --config ${configurations##*/} --output ${output} --vocab ${vocab##*/} >>log_infer.log 2>&1 &
+    python eval.py --config ${configurations##*/} --output ${output} --vocab ${vocab##*/} --metric ${metric} >>log_infer.log 2>&1 &
   fi
   cd ../
 done
diff --git a/model_zoo/mass/src/transformer/__init__.py b/model_zoo/mass/src/transformer/__init__.py
index 7912e7f0dd3..36db26d360b 100644
--- a/model_zoo/mass/src/transformer/__init__.py
+++ b/model_zoo/mass/src/transformer/__init__.py
@@ -19,10 +19,11 @@ from .decoder import TransformerDecoder
 from .beam_search import BeamSearchDecoder
 from .transformer_for_train import TransformerTraining, LabelSmoothedCrossEntropyCriterion, \
     TransformerNetworkWithLoss, TransformerTrainOneStepWithLossScaleCell
-from .infer_mass import infer
+from .infer_mass import infer, infer_ppl
 
 __all__ = [
     "infer",
+    "infer_ppl",
     "TransformerTraining",
     "LabelSmoothedCrossEntropyCriterion",
     "TransformerTrainOneStepWithLossScaleCell",
diff --git a/model_zoo/mass/src/transformer/embedding.py b/model_zoo/mass/src/transformer/embedding.py
index bdce540416d..22887b0a3e0 100644
--- a/model_zoo/mass/src/transformer/embedding.py
+++ b/model_zoo/mass/src/transformer/embedding.py
@@ -41,7 +41,7 @@ class EmbeddingLookup(nn.Cell):
         self.vocab_size = vocab_size
         self.use_one_hot_embeddings = use_one_hot_embeddings
 
-        init_weight = np.random.normal(0, embed_dim ** -0.5, size=[vocab_size, embed_dim])
+        init_weight = np.random.normal(0, embed_dim ** -0.5, size=[vocab_size, embed_dim]).astype(np.float32)
         # 0 is Padding index, thus init it as 0.
         init_weight[0, :] = 0
         self.embedding_table = Parameter(Tensor(init_weight),
diff --git a/model_zoo/mass/src/transformer/infer_mass.py b/model_zoo/mass/src/transformer/infer_mass.py
index 54a0b4e54fd..b887e3a7b54 100644
--- a/model_zoo/mass/src/transformer/infer_mass.py
+++ b/model_zoo/mass/src/transformer/infer_mass.py
@@ -17,13 +17,16 @@ import time
 
 import mindspore.nn as nn
 import mindspore.common.dtype as mstype
+from mindspore.ops import operations as P
 from mindspore.common.tensor import Tensor
 from mindspore.train.model import Model
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
 
 from mindspore import context
 
 from src.dataset import load_dataset
 from .transformer_for_infer import TransformerInferModel
+from .transformer_for_train import TransformerTraining
 from ..utils.load_weights import load_infer_weights
 
 context.set_context(
@@ -156,3 +159,129 @@ def infer(config):
                                 shuffle=False) if config.test_dataset else None
     prediction = transformer_infer(config, eval_dataset)
     return prediction
+
+
+class TransformerInferPPLCell(nn.Cell):
+    """
+    Encapsulation class of transformer network infer for PPL.
+
+    Args:
+        config(TransformerConfig): Config.
+
+    Returns:
+        Tuple[Tensor, Tensor], predicted log prob and label lengths.
+    """
+    def __init__(self, config):
+        super(TransformerInferPPLCell, self).__init__()
+        self.transformer = TransformerTraining(config, is_training=False, use_one_hot_embeddings=False)
+        self.batch_size = config.batch_size
+        self.vocab_size = config.vocab_size
+        self.one_hot = P.OneHot()
+        self.on_value = Tensor(float(1), mstype.float32)
+        self.off_value = Tensor(float(0), mstype.float32)
+        self.reduce_sum = P.ReduceSum()
+        self.reshape = P.Reshape()
+        self.cast = P.Cast()
+        self.flat_shape = (config.batch_size * config.seq_length,)
+        self.batch_shape = (config.batch_size, config.seq_length)
+        self.last_idx = (-1,)
+
+    def construct(self,
+                  source_ids,
+                  source_mask,
+                  target_ids,
+                  target_mask,
+                  label_ids,
+                  label_mask):
+        """Defines the computation performed."""
+
+        predicted_log_probs = self.transformer(source_ids, source_mask, target_ids, target_mask)
+        label_ids = self.reshape(label_ids, self.flat_shape)
+        label_mask = self.cast(label_mask, mstype.float32)
+        one_hot_labels = self.one_hot(label_ids, self.vocab_size, self.on_value, self.off_value)
+
+        label_log_probs = self.reduce_sum(predicted_log_probs * one_hot_labels, self.last_idx)
+        label_log_probs = self.reshape(label_log_probs, self.batch_shape)
+        log_probs = label_log_probs * label_mask
+        lengths = self.reduce_sum(label_mask, self.last_idx)
+
+        return log_probs, lengths
+
+
+def transformer_infer_ppl(config, dataset):
+    """
+    Run infer with Transformer for PPL.
+
+    Args:
+        config (TransformerConfig): Config.
+        dataset (Dataset): Dataset.
+
+    Returns:
+        List[Dict], prediction, each example has 4 keys, "source",
+        "target", "log_prob" and "length".
+    """
+    tfm_infer = TransformerInferPPLCell(config=config)
+    tfm_infer.init_parameters_data()
+
+    parameter_dict = load_checkpoint(config.existed_ckpt)
+    load_param_into_net(tfm_infer, parameter_dict)
+
+    model = Model(tfm_infer)
+
+    log_probs = []
+    lengths = []
+    source_sentences = []
+    target_sentences = []
+    for batch in dataset.create_dict_iterator():
+        source_sentences.append(batch["source_eos_ids"])
+        target_sentences.append(batch["target_eos_ids"])
+
+        source_ids = Tensor(batch["source_eos_ids"], mstype.int32)
+        source_mask = Tensor(batch["source_eos_mask"], mstype.int32)
+        target_ids = Tensor(batch["target_sos_ids"], mstype.int32)
+        target_mask = Tensor(batch["target_sos_mask"], mstype.int32)
+        label_ids = Tensor(batch["target_eos_ids"], mstype.int32)
+        label_mask = Tensor(batch["target_eos_mask"], mstype.int32)
+
+        start_time = time.time()
+        log_prob, length = model.predict(source_ids, source_mask, target_ids, target_mask, label_ids, label_mask)
+        print(f" | Batch size: {config.batch_size}, "
+              f"Time cost: {time.time() - start_time}.")
+
+        log_probs.append(log_prob.asnumpy())
+        lengths.append(length.asnumpy())
+
+    output = []
+    for inputs, ref, log_prob, length in zip(source_sentences,
+                                             target_sentences,
+                                             log_probs,
+                                             lengths):
+        for i in range(config.batch_size):
+            example = {
+                "source": inputs[i].tolist(),
+                "target": ref[i].tolist(),
+                "log_prob": log_prob[i].tolist(),
+                "length": length[i]
+            }
+            output.append(example)
+
+    return output
+
+
+def infer_ppl(config):
+    """
+    Transformer infer PPL api.
+
+    Args:
+        config (TransformerConfig): Config.
+
+    Returns:
+        list, result with
+    """
+    eval_dataset = load_dataset(data_files=config.test_dataset,
+                                batch_size=config.batch_size,
+                                epoch_count=1,
+                                sink_mode=config.dataset_sink_mode,
+                                shuffle=False) if config.test_dataset else None
+    prediction = transformer_infer_ppl(config, eval_dataset)
+    return prediction
diff --git a/model_zoo/mass/src/utils/__init__.py b/model_zoo/mass/src/utils/__init__.py
index f78be57b228..efb9f6f4b6a 100644
--- a/model_zoo/mass/src/utils/__init__.py
+++ b/model_zoo/mass/src/utils/__init__.py
@@ -20,6 +20,7 @@ from .loss_monitor import LossCallBack
 from .byte_pair_encoding import bpe_encode
 from .initializer import zero_weight, one_weight, normal_weight, weight_variable
 from .rouge_score import rouge
+from .eval_score import get_score
 
 __all__ = [
     "Dictionary",
@@ -31,5 +32,6 @@ __all__ = [
     "one_weight",
     "zero_weight",
     "normal_weight",
-    "weight_variable"
+    "weight_variable",
+    "get_score"
 ]
diff --git a/model_zoo/mass/src/utils/eval_score.py b/model_zoo/mass/src/utils/eval_score.py
new file mode 100644
index 00000000000..30ff0b22083
--- /dev/null
+++ b/model_zoo/mass/src/utils/eval_score.py
@@ -0,0 +1,92 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Get score by given metric."""
+from .ppl_score import ngram_ppl
+from .rouge_score import rouge
+
+
+def get_ppl_score(result):
+    """
+    Calculate Perplexity(PPL) score.
+
+    Args:
+        List[Dict], prediction, each example has 4 keys, "source",
+        "target", "log_prob" and "length".
+
+    Returns:
+        Float, ppl score.
+    """
+    log_probs = []
+    total_length = 0
+
+    for sample in result:
+        log_prob = sample['log_prob']
+        length = sample['length']
+        log_probs.extend(log_prob)
+        total_length += length
+
+        print(f" | log_prob:{log_prob}")
+        print(f" | length:{length}")
+
+    ppl = ngram_ppl(log_probs, total_length, log_softmax=True)
+    print(f" | final PPL={ppl}.")
+    return ppl
+
+
+def get_rouge_score(result, vocab):
+    """
+    Calculate ROUGE score.
+
+    Args:
+        List[Dict], prediction, each example has 4 keys, "source",
+        "target", "prediction" and "prediction_prob".
+        Dictionary, dict instance.
+
+    retur:
+        Str, rouge score.
+    """
+
+    predictions = []
+    targets = []
+    for sample in result:
+        predictions.append(' '.join([vocab[t] for t in sample['prediction']]))
+        targets.append(' '.join([vocab[t] for t in sample['target']]))
+        print(f" | source: {' '.join([vocab[t] for t in sample['source']])}")
+        print(f" | target: {targets[-1]}")
+
+    return rouge(predictions, targets)
+
+
+def get_score(result, vocab=None, metric='rouge'):
+    """
+    Get eval score.
+
+    Args:
+        List[Dict], prediction.
+        Dictionary, dict instance.
+        Str, metric function, default is rouge.
+
+    Return:
+        Str, Score.
+    """
+    score = None
+    if metric == 'rouge':
+        score = get_rouge_score(result, vocab)
+    elif metric == 'ppl':
+        score = get_ppl_score(result)
+    else:
+        print(f" |metric not in (rouge, ppl)")
+
+    return score
diff --git a/model_zoo/mass/src/utils/ppl_score.py b/model_zoo/mass/src/utils/ppl_score.py
index 2e5d6e6642e..4a9139ced03 100644
--- a/model_zoo/mass/src/utils/ppl_score.py
+++ b/model_zoo/mass/src/utils/ppl_score.py
@@ -17,10 +17,7 @@ from typing import Union
 
 import numpy as np
 
-NINF = -1.0 * 1e9
-
-
-def ngram_ppl(prob: Union[np.ndarray, list], log_softmax=False, index: float = np.e):
+def ngram_ppl(prob: Union[np.ndarray, list], length: int, log_softmax=False, index: float = np.e):
     """
     Calculate Perplexity(PPL) score under N-gram language model.
 
@@ -39,7 +36,8 @@ def ngram_ppl(prob: Union[np.ndarray, list], log_softmax=False, index: float = n
     Returns:
         float, ppl score.
     """
-    eps = 1e-8
+    if not length:
+        return np.inf
     if not isinstance(prob, (np.ndarray, list)):
         raise TypeError("`prob` must be type of list or np.ndarray.")
     if not isinstance(prob, np.ndarray):
@@ -47,18 +45,17 @@ def ngram_ppl(prob: Union[np.ndarray, list], log_softmax=False, index: float = n
     if prob.shape[0] == 0:
         raise ValueError("`prob` length must greater than 0.")
 
-    p = 1.0
-    sen_len = 0
-    for t in range(prob.shape[0]):
-        s = prob[t]
-        if s <= NINF:
-            break
-        if log_softmax:
-            s = np.power(index, s)
-        p *= (1 / (s + eps))
-        sen_len += 1
+    print(f'length:{length}, log_prob:{prob}')
 
-    if sen_len == 0:
-        return np.inf
+    if log_softmax:
+        prob = np.sum(prob) / length
+        ppl = 1. / np.power(index, prob)
+        print(f'avg log prob:{prob}')
+    else:
+        p = 1.
+        for i in range(prob.shape[0]):
+            p *= (1. / prob[i])
+        ppl = pow(p, 1 / length)
 
-    return pow(p, 1 / sen_len)
+    print(f'ppl val:{ppl}')
+    return ppl
diff --git a/model_zoo/mobilenetv2/Readme.md b/model_zoo/mobilenetv2/Readme.md
index 5b36a63fe49..1687d2cbdcf 100644
--- a/model_zoo/mobilenetv2/Readme.md
+++ b/model_zoo/mobilenetv2/Readme.md
@@ -60,14 +60,14 @@ Dataset used: [imagenet](http://www.image-net.org/)
 
 ### Usage
 
-- Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]
+- Ascend: sh run_train.sh Ascend [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH] [CKPT_PATH]
 - GPU: sh run_trian.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]
 
 ### Launch
 
 ``` 
 # training example
-  Ascend: sh run_train.sh Ascend 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet/train/ mobilenet_199.ckpt
+  Ascend: sh run_train.sh Ascend 8 0,1,2,3,4,5,6,7 hccl_config.json ~/imagenet/train/ mobilenet_199.ckpt
   GPU: sh run_train.sh GPU 8 0,1,2,3,4,5,6,7 ~/imagenet/train/
 ```
 
diff --git a/model_zoo/mobilenetv2/scripts/run_train.sh b/model_zoo/mobilenetv2/scripts/run_train.sh
index f1d80aeac6d..a6e2a794774 100644
--- a/model_zoo/mobilenetv2/scripts/run_train.sh
+++ b/model_zoo/mobilenetv2/scripts/run_train.sh
@@ -22,14 +22,16 @@ run_ascend()
     exit 1
     fi
 
-    if [ ! -d $5 ]
+    if [ ! -d $5 ] && [ ! -f $5 ]
     then
-        echo "error: DATASET_PATH=$5 is not a directory"
+        echo "error: DATASET_PATH=$5 is not a directory or file"
     exit 1
     fi
 
     BASEPATH=$(cd "`dirname $0`" || exit; pwd)
     export PYTHONPATH=${BASEPATH}:$PYTHONPATH
+    export MINDSPORE_HCCL_CONFIG_PATH=$4
+    export RANK_TABLE_FILE=$4
     if [ -d "../train" ];
     then
         rm -rf ../train
@@ -38,8 +40,7 @@ run_ascend()
     cd ../train || exit
     python ${BASEPATH}/../src/launch.py \
             --nproc_per_node=$2 \
-            --visible_devices=$4 \
-            --server_id=$3 \
+            --visible_devices=$3 \
             --training_script=${BASEPATH}/../train.py \
             --dataset_path=$5 \
             --pre_trained=$6 \
@@ -80,7 +81,7 @@ run_gpu()
 if [ $# -gt 6 ] || [ $# -lt 4 ]
 then
     echo "Usage:\n \
-          Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \
+          Ascend: sh run_train.sh Ascend [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH] [CKPT_PATH]\n \
           GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \
           "
 exit 1
diff --git a/model_zoo/mobilenetv2/src/launch.py b/model_zoo/mobilenetv2/src/launch.py
index 48c81596645..f5c97b0bd70 100644
--- a/model_zoo/mobilenetv2/src/launch.py
+++ b/model_zoo/mobilenetv2/src/launch.py
@@ -15,7 +15,6 @@
 """launch train script"""
 import os
 import sys
-import json
 import subprocess
 import shutil
 from argparse import ArgumentParser
@@ -42,8 +41,6 @@ def parse_args():
                              "each process can be bound to a single D.")
     parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7",
                         help="will use the visible devices sequentially")
-    parser.add_argument("--server_id", type=str, default="",
-                        help="server ip")
     parser.add_argument("--training_script", type=str,
                         help="The full path to the single D training "
                              "program/script to be launched in parallel, "
@@ -63,66 +60,6 @@ def main():
     assert os.path.isfile(args.training_script)
     assert len(visible_devices) >= args.nproc_per_node
     print('visible_devices:{}'.format(visible_devices))
-    if not args.server_id:
-        print('pleaser input server ip!!!')
-        exit(0)
-    print('server_id:{}'.format(args.server_id))
-
-    # construct hccn_table
-    hccn_configs = open('/etc/hccn.conf', 'r').readlines()
-    device_ips = {}
-    for hccn_item in hccn_configs:
-        hccn_item = hccn_item.strip()
-        if hccn_item.startswith('address_'):
-            device_id, device_ip = hccn_item.split('=')
-            device_id = device_id.split('_')[1]
-            device_ips[device_id] = device_ip
-            print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
-    hccn_table = {}
-    hccn_table['board_id'] = '0x0000'
-    hccn_table['chip_info'] = '910'
-    hccn_table['deploy_mode'] = 'lab'
-    hccn_table['group_count'] = '1'
-    hccn_table['group_list'] = []
-    instance_list = []
-    usable_dev = ''
-    for instance_id in range(args.nproc_per_node):
-        instance = {}
-        instance['devices'] = []
-        device_id = visible_devices[instance_id]
-        device_ip = device_ips[device_id]
-        usable_dev += str(device_id)
-        instance['devices'].append({
-            'device_id': device_id,
-            'device_ip': device_ip,
-        })
-        instance['rank_id'] = str(instance_id)
-        instance['server_id'] = args.server_id
-        instance_list.append(instance)
-    hccn_table['group_list'].append({
-        'device_num': str(args.nproc_per_node),
-        'server_num': '1',
-        'group_name': '',
-        'instance_count': str(args.nproc_per_node),
-        'instance_list': instance_list,
-    })
-    hccn_table['para_plane_nic_location'] = 'device'
-    hccn_table['para_plane_nic_name'] = []
-    for instance_id in range(args.nproc_per_node):
-        eth_id = visible_devices[instance_id]
-        hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
-    hccn_table['para_plane_nic_num'] = str(args.nproc_per_node)
-    hccn_table['status'] = 'completed'
-
-    # save hccn_table to file
-    table_path = os.getcwd()
-    if not os.path.exists(table_path):
-        os.mkdir(table_path)
-    table_fn = os.path.join(table_path,
-                            'rank_table_{}p_{}_{}.json'.format(args.nproc_per_node, usable_dev, args.server_id))
-    with open(table_fn, 'w') as table_fp:
-        json.dump(hccn_table, table_fp, indent=4)
-    sys.stdout.flush()
 
     # spawn the processes
     processes = []
@@ -137,9 +74,6 @@ def main():
         device_dir = os.path.join(cur_path, 'device{}'.format(rank_id))
         env['RANK_ID'] = str(rank_id)
         env['DEVICE_ID'] = str(device_id)
-        if args.nproc_per_node > 1:
-            env['MINDSPORE_HCCL_CONFIG_PATH'] = table_fn
-            env['RANK_TABLE_FILE'] = table_fn
         if os.path.exists(device_dir):
             shutil.rmtree(device_dir)
         os.mkdir(device_dir)
diff --git a/model_zoo/mobilenetv2/train.py b/model_zoo/mobilenetv2/train.py
index 2c211b375a4..4ae743f5405 100644
--- a/model_zoo/mobilenetv2/train.py
+++ b/model_zoo/mobilenetv2/train.py
@@ -18,6 +18,7 @@ import time
 import argparse
 import random
 import numpy as np
+
 from mindspore import context
 from mindspore import Tensor
 from mindspore import nn
@@ -32,8 +33,9 @@ from mindspore.train.model import Model, ParallelMode
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from mindspore.communication.management import init, get_group_size
+from mindspore.communication.management import init, get_group_size, get_rank
 import mindspore.dataset.engine as de
+
 from src.dataset import create_dataset
 from src.lr_generator import get_lr
 from src.config import config_gpu, config_ascend
@@ -60,9 +62,14 @@ if args_opt.platform == "Ascend":
                         device_id=device_id, save_graphs=False)
 elif args_opt.platform == "GPU":
     context.set_context(mode=context.GRAPH_MODE,
-                        device_target="GPU", save_graphs=False)
+                        device_target="GPU",
+                        save_graphs=False)
+    init("nccl")
+    context.set_auto_parallel_context(device_num=get_group_size(),
+                                      parallel_mode=ParallelMode.DATA_PARALLEL,
+                                      mirror_mean=True)
 else:
-    raise ValueError("Unsupport platform.")
+    raise ValueError("Unsupported device target.")
 
 
 class CrossEntropyWithLabelSmooth(_Loss):
@@ -155,12 +162,8 @@ class Monitor(Callback):
 if __name__ == '__main__':
     if args_opt.platform == "GPU":
         # train on gpu
-        print("train args: ", args_opt, "\ncfg: ", config_gpu)
-
-        init('nccl')
-        context.set_auto_parallel_context(parallel_mode="data_parallel",
-                                          mirror_mean=True,
-                                          device_num=get_group_size())
+        print("train args: ", args_opt)
+        print("cfg: ", config_gpu)
 
         # define net
         net = mobilenet_v2(num_classes=config_gpu.num_classes, platform="GPU")
@@ -201,13 +204,13 @@ if __name__ == '__main__':
                       loss_scale_manager=loss_scale)
 
         cb = [Monitor(lr_init=lr.asnumpy())]
+        ckpt_save_dir = config_gpu.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"
         if config_gpu.save_checkpoint:
             config_ck = CheckpointConfig(save_checkpoint_steps=config_gpu.save_checkpoint_epochs * step_size,
                                          keep_checkpoint_max=config_gpu.keep_checkpoint_max)
-            ckpt_cb = ModelCheckpoint(
-                prefix="mobilenetV2", directory=config_gpu.save_checkpoint_path, config=config_ck)
+            ckpt_cb = ModelCheckpoint(prefix="mobilenetV2", directory=ckpt_save_dir, config=config_ck)
             cb += [ckpt_cb]
-        # begine train
+        # begin train
         model.train(epoch_size, dataset, callbacks=cb)
     elif args_opt.platform == "Ascend":
         # train on ascend
diff --git a/model_zoo/mobilenetv2_quant/export.py b/model_zoo/mobilenetv2_quant/export.py
new file mode 100644
index 00000000000..00e377cece2
--- /dev/null
+++ b/model_zoo/mobilenetv2_quant/export.py
@@ -0,0 +1,54 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Export MobilenetV2 on ImageNet"""
+
+import argparse
+import numpy as np
+
+import mindspore
+from mindspore import Tensor
+from mindspore import context
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
+from mindspore.train.quant import quant
+
+from src.mobilenetV2 import mobilenetV2
+from src.config import config_ascend
+
+parser = argparse.ArgumentParser(description='Image classification')
+parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
+parser.add_argument('--device_target', type=str, default=None, help='Run device target')
+args_opt = parser.parse_args()
+
+if __name__ == '__main__':
+    cfg = None
+    if args_opt.device_target == "Ascend":
+        cfg = config_ascend
+        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
+    else:
+        raise ValueError("Unsupported device target: {}.".format(args_opt.device_target))
+
+    # define fusion network
+    network = mobilenetV2(num_classes=cfg.num_classes)
+    # convert fusion network to quantization aware network
+    network = quant.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False])
+    # load checkpoint
+    param_dict = load_checkpoint(args_opt.checkpoint_path)
+    load_param_into_net(network, param_dict)
+
+    # export network
+    print("============== Starting export ==============")
+    inputs = Tensor(np.ones([1, 3, cfg.image_height, cfg.image_width]), mindspore.float32)
+    quant.export(network, inputs, file_name="mobilenet_quant", file_format='GEIR')
+    print("============== End export ==============")
diff --git a/model_zoo/mobilenetv3/train.py b/model_zoo/mobilenetv3/train.py
index 578893ab75d..57199ec1a70 100644
--- a/model_zoo/mobilenetv3/train.py
+++ b/model_zoo/mobilenetv3/train.py
@@ -18,6 +18,7 @@ import time
 import argparse
 import random
 import numpy as np
+
 from mindspore import context
 from mindspore import Tensor
 from mindspore import nn
@@ -33,7 +34,8 @@ from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 import mindspore.dataset.engine as de
-from mindspore.communication.management import init, get_group_size
+from mindspore.communication.management import init, get_group_size, get_rank
+
 from src.dataset import create_dataset
 from src.lr_generator import get_lr
 from src.config import config_gpu, config_ascend
@@ -57,10 +59,16 @@ if args_opt.platform == "Ascend":
     device_id = int(os.getenv('DEVICE_ID'))
     context.set_context(mode=context.GRAPH_MODE,
                         device_target="Ascend",
-                        device_id=device_id, save_graphs=False)
+                        device_id=device_id,
+                        save_graphs=False)
 elif args_opt.platform == "GPU":
     context.set_context(mode=context.GRAPH_MODE,
-                        device_target="GPU", save_graphs=False)
+                        device_target="GPU",
+                        save_graphs=False)
+    init("nccl")
+    context.set_auto_parallel_context(device_num=get_group_size(),
+                                      parallel_mode=ParallelMode.DATA_PARALLEL,
+                                      mirror_mean=True)
 else:
     raise ValueError("Unsupport platform.")
 
@@ -155,12 +163,8 @@ class Monitor(Callback):
 if __name__ == '__main__':
     if args_opt.platform == "GPU":
         # train on gpu
-        print("train args: ", args_opt, "\ncfg: ", config_gpu)
-
-        init('nccl')
-        context.set_auto_parallel_context(parallel_mode="data_parallel",
-                                          mirror_mean=True,
-                                          device_num=get_group_size())
+        print("train args: ", args_opt)
+        print("cfg: ", config_gpu)
 
         # define net
         net = mobilenet_v3_large(num_classes=config_gpu.num_classes)
@@ -201,11 +205,11 @@ if __name__ == '__main__':
                       loss_scale_manager=loss_scale)
 
         cb = [Monitor(lr_init=lr.asnumpy())]
+        ckpt_save_dir = config_gpu.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"
         if config_gpu.save_checkpoint:
             config_ck = CheckpointConfig(save_checkpoint_steps=config_gpu.save_checkpoint_epochs * step_size,
                                          keep_checkpoint_max=config_gpu.keep_checkpoint_max)
-            ckpt_cb = ModelCheckpoint(
-                prefix="mobilenetV3", directory=config_gpu.save_checkpoint_path, config=config_ck)
+            ckpt_cb = ModelCheckpoint(prefix="mobilenetV3", directory=ckpt_save_dir, config=config_ck)
             cb += [ckpt_cb]
         # begine train
         model.train(epoch_size, dataset, callbacks=cb)
diff --git a/model_zoo/utils/hccl_tools/README.md b/model_zoo/utils/hccl_tools/README.md
new file mode 100644
index 00000000000..b73a99e592c
--- /dev/null
+++ b/model_zoo/utils/hccl_tools/README.md
@@ -0,0 +1,14 @@
+# description
+
+mindspore distributed training launch helper utilty that will generate hccl config file.
+
+# use
+
+```
+python hccl_tools.py --device_num [1,8]
+```
+
+output:
+```
+hccl_[device_num]p_[which device]_[server_ip].json
+```
\ No newline at end of file
diff --git a/model_zoo/utils/hccl_tools/hccl_tools.py b/model_zoo/utils/hccl_tools/hccl_tools.py
new file mode 100644
index 00000000000..ac4114c0a8d
--- /dev/null
+++ b/model_zoo/utils/hccl_tools/hccl_tools.py
@@ -0,0 +1,165 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""generate hccl config file script"""
+import os
+import sys
+import json
+import socket
+import platform
+from argparse import ArgumentParser
+from typing import Dict, Any
+
+
+def parse_args():
+    """
+    parse args .
+
+    Args:
+
+    Returns:
+        args.
+
+    Examples:
+        >>> parse_args()
+    """
+    parser = ArgumentParser(description="mindspore distributed training launch "
+                                        "helper utilty that will generate hccl"
+                                        " config file")
+    parser.add_argument("--device_num", type=str, default="[0,8]",
+                        help="The number of the D chip used. please note that the D chips"
+                             "used must be continuous, such [0,4] means to use four chips "
+                             "0，1，2，3; [0,1] means to use chip 0; The first four chips are"
+                             "a group, and the last four chips are a group. In addition to"
+                             "the [0,8] chips are allowed, other cross-group such as [3,6]"
+                             "are prohibited.")
+    parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7",
+                        help="will use the visible devices sequentially")
+    parser.add_argument("--server_ip", type=str, default="",
+                        help="server ip")
+    args = parser.parse_args()
+    return args
+
+
+def get_host_ip():
+    """
+    get host ip
+    """
+    ip = None
+
+    try:
+        hostname = socket.gethostname()
+        ip = socket.gethostbyname(hostname)
+    except EOFError:
+        pass
+
+    return ip
+
+
+def main():
+    print("start", __file__)
+    args = parse_args()
+
+    # visible_devices
+    visible_devices = args.visible_devices.split(',')
+    print('visible_devices:{}'.format(visible_devices))
+
+    # server_id
+    ip = get_host_ip()
+    if args.server_ip:
+        server_id = args.server_ip
+    elif ip:
+        server_id = ip
+    else:
+        raise ValueError("please input server ip!")
+    print('server_id:{}'.format(server_id))
+
+    # device_num
+    first_num = int(args.device_num[1])
+    last_num = int(args.device_num[3])
+    if first_num < 0 or last_num > 8:
+        raise ValueError("device num {} must be in range [0,8] !".format(args.device_num))
+    if first_num > last_num:
+        raise ValueError("First num {} of device num {} must less than last num {} !".format(first_num, args.device_num,
+                                                                                             last_num))
+    if first_num < 4:
+        if last_num > 4:
+            if first_num == 0 and last_num == 8:
+                pass
+            else:
+                raise ValueError("device num {} must be in the same group of [0,4] or [4,8] !".format(args.device_num))
+
+    device_num_list = list(range(first_num, last_num))
+    print("device_num_list:", device_num_list)
+
+    assert len(visible_devices) >= len(device_num_list)
+
+    # construct hccn_table
+    device_ips: Dict[Any, Any] = {}
+    with open('/etc/hccn.conf', 'r') as fin:
+        for hccn_item in fin.readlines():
+            if hccn_item.strip().startswith('address_'):
+                device_id, device_ip = hccn_item.split('=')
+                device_id = device_id.split('_')[1]
+                device_ips[device_id] = device_ip.strip()
+
+    arch = platform.processor()
+    hccn_table = {'board_id': {'aarch64': '0x002f', 'x86_64': '0x0000'}[arch],
+                  'chip_info': '910',
+                  'deploy_mode': 'lab',
+                  'group_count': '1',
+                  'group_list': []}
+    instance_list = []
+    rank_id = 0
+    for instance_id in device_num_list:
+        instance = {'devices': []}
+        device_id = visible_devices[instance_id]
+        device_ip = device_ips[device_id]
+        instance['devices'].append({
+            'device_id': device_id,
+            'device_ip': device_ip,
+        })
+        print('rank_id:{}, device_id:{}, device_ip:{}'.format(rank_id, device_id, device_ip))
+        instance['rank_id'] = str(rank_id)
+        rank_id += 1
+        instance['server_id'] = server_id
+        instance_list.append(instance)
+    hccn_table['group_list'].append({
+        'device_num': str(len(device_num_list)),
+        'server_num': '1',
+        'group_name': '',
+        'instance_count': str(len(device_num_list)),
+        'instance_list': instance_list,
+    })
+    hccn_table['para_plane_nic_location'] = 'device'
+    hccn_table['para_plane_nic_name'] = []
+    for instance_id in device_num_list:
+        eth_id = visible_devices[instance_id]
+        hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
+    hccn_table['para_plane_nic_num'] = str(len(device_num_list))
+    hccn_table['status'] = 'completed'
+
+    # save hccn_table to file
+    table_path = os.getcwd()
+    table_fn = os.path.join(table_path,
+                            'hccl_{}p_{}_{}.json'.format(len(device_num_list), "".join(map(str, device_num_list)),
+                                                         server_id))
+    with open(table_fn, 'w') as table_fp:
+        json.dump(hccn_table, table_fp, indent=4)
+    sys.stdout.flush()
+    print("Completed: hccl file was save in :", table_fn)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/model_zoo/wide_and_deep/src/wide_and_deep.py b/model_zoo/wide_and_deep/src/wide_and_deep.py
index 16102039a88..048bf3c66d5 100644
--- a/model_zoo/wide_and_deep/src/wide_and_deep.py
+++ b/model_zoo/wide_and_deep/src/wide_and_deep.py
@@ -188,7 +188,7 @@ class WideDeepModel(nn.Cell):
                                         self.deep_layer_act,
                                         use_activation=False, convert_dtype=True, drop_out=config.dropout_flag)
 
-        self.gather_v2 = P.GatherV2()
+        self.embeddinglookup = nn.EmbeddingLookup(target='DEVICE')
         self.mul = P.Mul()
         self.reduce_sum = P.ReduceSum(keep_dims=False)
         self.reshape = P.Reshape()
@@ -206,11 +206,11 @@ class WideDeepModel(nn.Cell):
         """
         mask = self.reshape(wt_hldr, (self.batch_size, self.field_size, 1))
         # Wide layer
-        wide_id_weight = self.gather_v2(self.wide_w, id_hldr, 0)
+        wide_id_weight = self.embeddinglookup(self.wide_w, id_hldr)
         wx = self.mul(wide_id_weight, mask)
         wide_out = self.reshape(self.reduce_sum(wx, 1) + self.wide_b, (-1, 1))
         # Deep layer
-        deep_id_embs = self.gather_v2(self.embedding_table, id_hldr, 0)
+        deep_id_embs = self.embeddinglookup(self.embedding_table, id_hldr)
         vx = self.mul(deep_id_embs, mask)
         deep_in = self.reshape(vx, (-1, self.field_size * self.emb_dim))
         deep_in = self.dense_layer_1(deep_in)
diff --git a/scripts/build_icu4c.sh b/scripts/build_icu4c.sh
new file mode 100755
index 00000000000..c7f21b756f6
--- /dev/null
+++ b/scripts/build_icu4c.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+echo '{
+  "strategy": "additive",
+  "featureFilters": {
+    "normalization": "include"
+  }
+}' > filter.json
+./icu4c/source/runConfigureICU Linux --enable-rpath --disable-tests --disable-samples --disable-icuio --disable-extras ICU_DATA_FILTER_FILE=filter.json "$@"
diff --git a/serving/CMakeLists.txt b/serving/CMakeLists.txt
index 3c1c08ece08..4529323fe14 100644
--- a/serving/CMakeLists.txt
+++ b/serving/CMakeLists.txt
@@ -13,7 +13,6 @@ add_library(protobuf::libprotobuf ALIAS protobuf::protobuf)
 add_executable(protobuf::libprotoc ALIAS protobuf::protoc)
 
 set(_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf)
-set(_REFLECTION gRPC::grpc++_reflection)
 if(CMAKE_CROSSCOMPILING)
     find_program(_PROTOBUF_PROTOC protoc)
 else()
@@ -22,10 +21,19 @@ endif()
 
 # Find gRPC installation
 # Looks for gRPCConfig.cmake file installed by gRPC's cmake installation.
+if (EXISTS ${grpc_ROOT}/lib64)
+    set(gRPC_DIR "${grpc_ROOT}/lib64/cmake/grpc")
+else()
+    set(gRPC_DIR "${grpc_ROOT}/lib/cmake/grpc")
+endif()
+message("serving using grpc_DIR : " ${gPRC_DIR})
+
 find_package(gRPC CONFIG REQUIRED)
 message(STATUS "Using gRPC ${gRPC_VERSION}")
 
 set(_GRPC_GRPCPP gRPC::grpc++)
+set(_REFLECTION gRPC::grpc++_reflection)
+
 if(CMAKE_CROSSCOMPILING)
     find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
 else()
diff --git a/setup.py b/setup.py
index 2840eb3b14b..bf16c9106be 100644
--- a/setup.py
+++ b/setup.py
@@ -103,6 +103,7 @@ package_data = {
         'lib/*.so*',
         'lib/*.a',
         '.commit_id',
+        'ms_serving'
     ]
 }
 
@@ -125,6 +126,8 @@ def update_permissions(path):
         for filename in filenames:
             file_fullpath = os.path.join(dirpath, filename)
             os.chmod(file_fullpath, stat.S_IREAD)
+            if filename == "ms_serving":
+                os.chmod(file_fullpath, stat.S_IREAD | stat.S_IEXEC)
 
 
 class EggInfo(egg_info):
diff --git a/tests/st/control/test_switch_layer.py b/tests/st/control/test_switch_layer.py
new file mode 100644
index 00000000000..4accb44f1ab
--- /dev/null
+++ b/tests/st/control/test_switch_layer.py
@@ -0,0 +1,56 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+import pytest
+
+import mindspore.context as context
+from mindspore import Tensor, nn
+from mindspore.common import dtype as mstype
+
+
+class CaseNet(nn.Cell):
+    def __init__(self):
+        super(CaseNet, self).__init__()
+        self.conv = nn.Conv2d(1, 3, 3)
+        self.relu = nn.ReLU()
+        self.softmax = nn.Softmax()
+        self.layers1 = (self.relu, self.softmax)
+        self.layers2 = (self.conv, self.relu)
+
+    def construct(self, x, index1, index2):
+        x = self.layers1[index1](x)
+        x = self.layers2[index2](x)
+        return x
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_switch_layer():
+    context.set_context(mode=context.GRAPH_MODE)
+    net = CaseNet()
+    data = Tensor(np.ones((1, 1, 224, 224)), mstype.float32)
+    idx = Tensor(0, mstype.int32)
+    idx2 = Tensor(-1, mstype.int32)
+    value = net(data, idx, idx2)
+    relu = nn.ReLU()
+    true_value = relu(data)
+    ret = np.allclose(value.asnumpy(), true_value.asnumpy())
+    assert ret
+
+    idx3 = Tensor(3, mstype.int32)
+    with pytest.raises(RuntimeError):
+        value = net(data, idx3, idx2)
diff --git a/mindspore/model_zoo/resnet.py b/tests/st/networks/models/resnet50/src/resnet.py
similarity index 100%
rename from mindspore/model_zoo/resnet.py
rename to tests/st/networks/models/resnet50/src/resnet.py
diff --git a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py
index c88af6bcf7e..e721b62c589 100644
--- a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py
+++ b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py
@@ -27,10 +27,10 @@ from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from mindspore.train.model import Model, ParallelMode
 from mindspore.train.callback import Callback
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
-from mindspore.model_zoo.resnet import resnet50
 import mindspore.nn as nn
 import mindspore.dataset as ds
 
+from tests.st.networks.models.resnet50.src.resnet import resnet50
 from tests.st.networks.models.resnet50.src.dataset import create_dataset
 from tests.st.networks.models.resnet50.src.lr_generator import get_learning_rate
 from tests.st.networks.models.resnet50.src.config import config
diff --git a/tests/st/ops/ascend/test_autocast.py b/tests/st/ops/ascend/test_autocast.py
index 448dc9b4d60..35690ce2c40 100644
--- a/tests/st/ops/ascend/test_autocast.py
+++ b/tests/st/ops/ascend/test_autocast.py
@@ -246,3 +246,21 @@ def test_tensor_auto_cast():
         bnet(t_fp32)
     with pytest.raises(TypeError):
         bnet(t_fp64)
+def test_bool_tensor_and_float():
+    context.set_context(mode=context.GRAPH_MODE)
+    t_bool = Tensor(np.ones([2, 1, 2, 2]).astype(np.bool), mstype.bool_)
+    t_int32 = Tensor(np.ones([2, 1, 2, 2]), mstype.int32)
+    t_fp16 = Tensor(np.ones([2, 1, 2, 2]), mstype.float16)
+    t_fp32 = Tensor(np.ones([2, 1, 2, 2]), mstype.float32)
+    net = TensorFPAutoCast()
+    out = net(t_bool)
+    assert out.dtype == mstype.float32
+    net = TensorIntAutoCast()
+    out = net(t_bool)
+    assert out.dtype == mstype.int32
+    out = net(t_fp16)
+    assert out.dtype == mstype.float16
+    out = net(t_fp32)
+    assert out.dtype == mstype.float32
+    out = net(t_int32)
+    assert out.dtype == mstype.int32
diff --git a/tests/st/ops/ascend/test_distribution/test_bernoulli.py b/tests/st/ops/ascend/test_distribution/test_bernoulli.py
new file mode 100644
index 00000000000..5652d536c7b
--- /dev/null
+++ b/tests/st/ops/ascend/test_distribution/test_bernoulli.py
@@ -0,0 +1,147 @@
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""test cases for bernoulli distribution"""
+import numpy as np
+from scipy import stats
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.common.api import ms_function
+from mindspore import dtype
+
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+
+class Net(nn.Cell):
+    """
+    Test class: probability of bernoulli distribution.
+    """
+    def __init__(self):
+        super(Net, self).__init__()
+        self.b = nn.Bernoulli(0.7, dtype=dtype.int32)
+
+    @ms_function
+    def construct(self, x_):
+        return self.b('prob', x_)
+
+class Net1(nn.Cell):
+    """
+    Test class: log probability of bernoulli distribution.
+    """
+    def __init__(self):
+        super(Net1, self).__init__()
+        self.b = nn.Bernoulli(0.7, dtype=dtype.int32)
+
+    @ms_function
+    def construct(self, x_):
+        return self.b('log_prob', x_)
+
+class Net2(nn.Cell):
+    """
+    Test class: kl_loss between bernoulli distributions.
+    """
+    def __init__(self):
+        super(Net2, self).__init__()
+        self.b = nn.Bernoulli(0.7, dtype=dtype.int32)
+
+    @ms_function
+    def construct(self, x_):
+        return self.b('kl_loss', 'Bernoulli', x_)
+
+class Net3(nn.Cell):
+    """
+    Test class: mean/sd of bernoulli distribution.
+    """
+    def __init__(self):
+        super(Net3, self).__init__()
+        self.b = nn.Bernoulli([0.5, 0.5], dtype=dtype.int32)
+
+    @ms_function
+    def construct(self):
+        return self.b('mean'), self.b('sd')
+
+class Net4(nn.Cell):
+    """
+    Test class: log probability of bernoulli distribution.
+    """
+    def __init__(self, shape, seed=0):
+        super(Net4, self).__init__()
+        self.b = nn.Bernoulli([0.7, 0.5], seed=seed, dtype=dtype.int32)
+        self.shape = shape
+
+    @ms_function
+    def construct(self, probs=None):
+        return self.b('sample', self.shape, probs)
+
+def test_pmf():
+    """
+    Test pmf.
+    """
+    bernoulli_benchmark = stats.bernoulli(0.7)
+    expect_pmf = bernoulli_benchmark.pmf([0, 1, 0, 1, 1]).astype(np.float32)
+    pdf = Net()
+    x_ = Tensor(np.array([0, 1, 0, 1, 1]).astype(np.int32), dtype=dtype.float32)
+    output = pdf(x_)
+    tol = 1e-6
+    assert (np.abs(output.asnumpy() - expect_pmf) < tol).all()
+
+def test_log_likelihood():
+    """
+    Test log_pmf.
+    """
+    bernoulli_benchmark = stats.bernoulli(0.7)
+    expect_logpmf = bernoulli_benchmark.logpmf([0, 1, 0, 1, 1]).astype(np.float32)
+    logprob = Net1()
+    x_ = Tensor(np.array([0, 1, 0, 1, 1]).astype(np.int32), dtype=dtype.float32)
+    output = logprob(x_)
+    tol = 1e-6
+    assert (np.abs(output.asnumpy() - expect_logpmf) < tol).all()
+
+def test_kl_loss():
+    """
+    Test kl_loss.
+    """
+    probs1_a = 0.7
+    probs1_b = 0.5
+    probs0_a = 1 - probs1_a
+    probs0_b = 1 - probs1_b
+    expect_kl_loss = probs1_a * np.log(probs1_a / probs1_b) + probs0_a * np.log(probs0_a / probs0_b)
+    kl_loss = Net2()
+    output = kl_loss(Tensor([probs1_b], dtype=dtype.float32))
+    tol = 1e-6
+    assert (np.abs(output.asnumpy() - expect_kl_loss) < tol).all()
+
+def test_basics():
+    """
+    Test mean/standard deviation and probs.
+    """
+    basics = Net3()
+    mean, sd = basics()
+    expect_mean = [0.5, 0.5]
+    assert (mean.asnumpy() == expect_mean).all()
+    assert (sd.asnumpy() == expect_mean).all()
+    b = nn.Bernoulli([0.7, 0.5], dtype=dtype.int32)
+    probs = b.probs()
+    expect_probs = [0.7, 0.5]
+    tol = 1e-6
+    assert (np.abs(probs.asnumpy() - expect_probs) < tol).all()
+
+def test_sample():
+    """
+    Test sample.
+    """
+    shape = (2, 3)
+    sample = Net4(shape)
+    output = sample()
+    assert output.shape == (2, 3, 2)
diff --git a/tests/st/ops/ascend/test_distribution/test_normal.py b/tests/st/ops/ascend/test_distribution/test_normal.py
new file mode 100644
index 00000000000..52bb1173ee6
--- /dev/null
+++ b/tests/st/ops/ascend/test_distribution/test_normal.py
@@ -0,0 +1,152 @@
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""test cases for normal distribution"""
+import numpy as np
+from scipy import stats
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.common.api import ms_function
+from mindspore import dtype
+
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+
+class Net(nn.Cell):
+    """
+    Test class: probability of normal distribution.
+    """
+    def __init__(self):
+        super(Net, self).__init__()
+        self.n = nn.Normal(np.array([3.0]), np.array([[2.0], [4.0]]), dtype=dtype.float32)
+
+    @ms_function
+    def construct(self, x_):
+        return self.n('prob', x_)
+
+class Net1(nn.Cell):
+    """
+    Test class: log probability of normal distribution.
+    """
+    def __init__(self):
+        super(Net1, self).__init__()
+        self.n = nn.Normal(np.array([3.0]), np.array([[2.0], [4.0]]), dtype=dtype.float32)
+
+    @ms_function
+    def construct(self, x_):
+        return self.n('log_prob', x_)
+
+class Net2(nn.Cell):
+    """
+    Test class: kl_loss of normal distribution.
+    """
+    def __init__(self):
+        super(Net2, self).__init__()
+        self.n = nn.Normal(np.array([3.0]), np.array([4.0]), dtype=dtype.float32)
+
+    @ms_function
+    def construct(self, x_, y_):
+        return self.n('kl_loss', 'Normal', x_, y_)
+
+class Net3(nn.Cell):
+    """
+    Test class: mean/sd of normal distribution.
+    """
+    def __init__(self):
+        super(Net3, self).__init__()
+        self.n = nn.Normal(np.array([3.0]), np.array([2.0, 4.0]), dtype=dtype.float32)
+
+    @ms_function
+    def construct(self):
+        return self.n('mean'), self.n('sd')
+
+class Net4(nn.Cell):
+    """
+    Test class: mean/sd of normal distribution.
+    """
+    def __init__(self, shape, seed=0):
+        super(Net4, self).__init__()
+        self.n = nn.Normal(np.array([3.0]), np.array([[2.0], [4.0]]), seed=seed, dtype=dtype.float32)
+        self.shape = shape
+
+    @ms_function
+    def construct(self, mean=None, sd=None):
+        return self.n('sample', self.shape, mean, sd)
+
+def test_pdf():
+    """
+    Test pdf.
+    """
+    norm_benchmark = stats.norm(np.array([3.0]), np.array([[2.0], [4.0]]))
+    expect_pdf = norm_benchmark.pdf([1.0, 2.0]).astype(np.float32)
+    pdf = Net()
+    output = pdf(Tensor([1.0, 2.0], dtype=dtype.float32))
+    tol = 1e-6
+    assert (np.abs(output.asnumpy() - expect_pdf) < tol).all()
+
+def test_log_likelihood():
+    """
+    Test log_pdf.
+    """
+    norm_benchmark = stats.norm(np.array([3.0]), np.array([[2.0], [4.0]]))
+    expect_logpdf = norm_benchmark.logpdf([1.0, 2.0]).astype(np.float32)
+    logprob = Net1()
+    output = logprob(Tensor([1.0, 2.0], dtype=dtype.float32))
+    tol = 1e-6
+    assert (np.abs(output.asnumpy() - expect_logpdf) < tol).all()
+
+def test_kl_loss():
+    """
+    Test kl_loss.
+    """
+    mean_a = np.array([3.0]).astype(np.float32)
+    sd_a = np.array([4.0]).astype(np.float32)
+
+    mean_b = np.array([1.0]).astype(np.float32)
+    sd_b = np.array([1.0]).astype(np.float32)
+
+    diff_log_scale = np.log(sd_a) - np.log(sd_b)
+    squared_diff = np.square(mean_a / sd_b - mean_b / sd_b)
+    expect_kl_loss = 0.5 * squared_diff + 0.5 * np.expm1(2 * diff_log_scale) - diff_log_scale
+
+    kl_loss = Net2()
+    mean = Tensor(mean_b, dtype=dtype.float32)
+    sd = Tensor(sd_b, dtype=dtype.float32)
+    output = kl_loss(mean, sd)
+    tol = 1e-6
+    assert (np.abs(output.asnumpy() - expect_kl_loss) < tol).all()
+
+def test_basics():
+    """
+    Test mean/standard deviation.
+    """
+    basics = Net3()
+    mean, sd = basics()
+    expect_mean = [3.0, 3.0]
+    expect_sd = [2.0, 4.0]
+    tol = 1e-6
+    assert (np.abs(mean.asnumpy() - expect_mean) < tol).all()
+    assert (np.abs(sd.asnumpy() - expect_sd) < tol).all()
+
+def test_sample():
+    """
+    Test sample.
+    """
+    shape = (2, 3)
+    seed = 10
+    mean = Tensor([2.0], dtype=dtype.float32)
+    sd = Tensor([2.0, 2.0, 2.0], dtype=dtype.float32)
+    sample = Net4(shape, seed=seed)
+    output = sample(mean, sd)
+    assert output.shape == (2, 3, 3)
diff --git a/tests/st/ops/gpu/test_ctcloss_op.py b/tests/st/ops/gpu/test_ctcloss_op.py
new file mode 100644
index 00000000000..b9a88e7e70a
--- /dev/null
+++ b/tests/st/ops/gpu/test_ctcloss_op.py
@@ -0,0 +1,119 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+import pytest
+
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.ops import operations as P
+from mindspore.common import dtype as mstype
+from mindspore.ops.composite import GradOperation
+
+class Net(nn.Cell):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.loss = P.CTCLossV2()
+        self.div = P.RealDiv()
+        self.cast = P.Cast()
+        self.mean = P.ReduceMean()
+
+    def construct(self, probs, label, input_length, label_length):
+        x, _ = self.loss(probs, label, input_length, label_length)
+        x = self.div(x, self.cast(label_length, mstype.float32))
+        x = self.mean(x)
+        return x
+
+class GradData(nn.Cell):
+    def __init__(self, network):
+        super(GradData, self).__init__()
+        self.grad = GradOperation(name="get_all", get_all=True, sens_param=False)
+        self.network = network
+
+    def construct(self, probs, labels, input_lengths, label_lengths):
+        return self.grad(self.network)(probs, labels, input_lengths, label_lengths)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_ctcloss():
+    probs = Tensor([[[-4.4131, -4.6093, -3.4333, -3.9268, -2.8917, -3.4093, -4.2243, -1.1379, -7.1046, -0.6902],
+                     [-2.5109, -3.3397, -4.9384, -1.2723, -1.1443, -2.4683, -2.6768, -4.1282, -2.7062, -3.1906],
+                     [-2.5092, -1.6392, -2.0864, -4.0059, -1.5610, -2.3223, -2.4816, -2.9922, -3.1412, -2.3311]],
+
+                    [[-2.1243, -3.5773, -3.1108, -4.4253, -2.7080, -1.9653, -2.0499, -2.4418, -1.8620, -1.5229],
+                     [-2.2479, -3.5128, -1.4189, -2.8701, -1.8562, -2.2752, -2.7019, -2.1865, -2.5634, -2.9869],
+                     [-3.2144, -1.3986, -3.1083, -3.9634, -3.5131, -3.2317, -2.6200, -1.7938, -1.8159, -1.7255]],
+
+                    [[-3.1301, -2.1649, -0.9286, -2.9452, -2.5992, -2.0263, -2.9201, -3.2155, -2.8302, -3.3636],
+                     [-1.4661, -3.6311, -2.4781, -4.6180, -2.7308, -1.7019, -1.5570, -2.6012, -4.0788, -2.3073],
+                     [-2.6833, -1.5033, -3.6922, -2.6360, -2.6974, -2.6847, -2.7579, -2.1396, -1.4093, -2.9630]],
+
+                    [[-2.0094, -2.3024, -3.3673, -1.0220, -2.8326, -2.2613, -3.0535, -2.9879, -3.7015, -2.4510],
+                     [-1.9071, -3.2603, -2.3229, -2.0572, -4.3450, -2.1284, -2.6306, -1.3824, -2.9815, -2.5061],
+                     [-2.7931, -3.7631, -3.2440, -4.3887, -1.0271, -3.8851, -1.2418, -4.5123, -2.2993, -2.4607]],
+
+                    [[-1.5763, -2.7539, -3.6941, -3.8166, -1.2599, -2.6903, -2.5826, -4.8208, -2.9562, -1.6321],
+                     [-3.3031, -3.0087, -1.9982, -1.9081, -3.8731, -2.8764, -2.2485, -2.3808, -1.4283, -2.1625],
+                     [-2.4516, -3.2394, -4.2053, -4.3541, -2.5229, -4.0717, -1.4894, -2.3151, -1.1098, -2.3465]]],
+                   dtype=mstype.float32)
+    labels = Tensor([9, 4, 6, 4, 7, 1, 4, 6, 6, 8], dtype=mstype.int32)
+    input_lengths = Tensor([5, 5, 5], dtype=mstype.int32)
+    label_lengths = Tensor([3, 3, 4], dtype=mstype.int32)
+
+    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+    net = Net()
+    ctc_loss = net(probs, labels, input_lengths, label_lengths)
+    expect_loss = [2.4099]
+    assert np.allclose(ctc_loss.asnumpy(), expect_loss)
+
+    grad = GradData(net)(probs, labels, input_lengths, label_lengths)
+    expect_grad = [[[8.8442e-05, 1.1065e-03, 3.5867e-03, 2.1896e-03, 6.1646e-03,
+                     3.6738e-03, 1.6262e-03, 3.5610e-02, 9.1258e-05, -5.4134e-02],
+                    [-3.7523e-03, 3.9386e-03, 7.9623e-04, 3.1132e-02, -6.2954e-02,
+                     9.4143e-03, 7.6425e-03, 1.7902e-03, 7.4211e-03, 4.5719e-03],
+                    [6.7778e-03, 1.6178e-02, 1.0344e-02, 1.5173e-03, -6.5840e-02,
+                     8.1707e-03, 6.9674e-03, 4.1814e-03, 3.6026e-03, 8.0991e-03]],
+
+                   [[-1.2581e-02, 3.1057e-03, 4.9517e-03, 1.3301e-03, -2.6320e-02,
+                     1.5568e-02, 1.4305e-02, 9.6671e-03, 1.7262e-02, -2.7292e-02],
+                    [-1.5566e-02, 3.3126e-03, 2.6887e-02, 6.2993e-03, -3.9716e-02,
+                     1.1420e-02, 7.4531e-03, -1.4252e-02, 8.5603e-03, 5.6048e-03],
+                    [3.3483e-03, 2.0579e-02, 3.7231e-03, 1.5832e-03, 2.4837e-03,
+                     3.2909e-03, -7.7267e-02, 1.3861e-02, 1.3558e-02, 1.4840e-02]],
+
+                   [[-8.0007e-03, 1.2751e-02, 4.3901e-02, 5.8435e-03, -7.2627e-02,
+                     1.4647e-02, -8.0584e-03, 4.4595e-03, 6.5557e-03, 5.2891e-04],
+                    [-3.6006e-02, 1.5308e-03, 9.3225e-03, 1.0969e-03, -2.5098e-03,
+                     2.0260e-02, 2.3419e-02, -3.0053e-02, 1.8809e-03, 1.1059e-02],
+                    [-7.7639e-02, 1.8533e-02, 2.0764e-03, 5.9706e-03, 5.6150e-03,
+                     5.6868e-03, 5.2854e-03, 9.8085e-03, 2.0360e-02, 4.3053e-03]],
+
+                   [[-2.6776e-02, 1.1113e-02, 3.8314e-03, 3.9986e-02, -1.6020e-02,
+                     1.1579e-02, -4.1635e-02, 5.5992e-03, 2.7429e-03, 9.5786e-03],
+                    [-6.8619e-03, -6.4066e-03, 1.0888e-02, 1.4201e-02, 1.4413e-03,
+                     1.3225e-02, 8.0039e-03, -4.9191e-02, 5.6352e-03, 9.0651e-03],
+                    [5.1026e-03, 1.9343e-03, 3.2506e-03, 1.0347e-03, 2.9837e-02,
+                     1.7121e-03, -5.9261e-02, 9.1443e-04, 8.3608e-03, 7.1146e-03]],
+
+                   [[-2.0848e-02, 7.0754e-03, 2.7633e-03, 2.4447e-03, 3.1520e-02,
+                     7.5401e-03, -5.8895e-02, 8.9559e-04, 5.7796e-03, 2.1724e-02],
+                    [-1.3499e-03, -1.0019e-01, 1.5064e-02, 1.6485e-02, 2.3104e-03,
+                     6.2597e-03, 1.1729e-02, 1.0275e-02, 2.6635e-02, 1.2782e-02],
+                    [7.1796e-03, 3.2656e-03, 1.2430e-03, 1.0712e-03, 6.6856e-03,
+                     1.4207e-03, 1.8792e-02, 8.2297e-03, -5.5865e-02, 7.9753e-03]]]
+    assert np.allclose(grad[0].asnumpy(), expect_grad, atol=1e-5)
diff --git a/tests/st/ops/gpu/test_dense_op.py b/tests/st/ops/gpu/test_dense_op.py
index 220f7ae051d..e9c010ea77d 100644
--- a/tests/st/ops/gpu/test_dense_op.py
+++ b/tests/st/ops/gpu/test_dense_op.py
@@ -228,6 +228,7 @@ def test_biasadd_3d():
     error = np.ones(shape=[3, 4, 8]) * 1.0e-6
     context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
     net = BiasAdd()
+    net.set_grad()
     result = net(x, b)
     diff = result.asnumpy() - expect
     assert np.all(diff < error)
diff --git a/tests/st/ops/gpu/test_normal.py b/tests/st/ops/gpu/test_normal.py
new file mode 100644
index 00000000000..0c4866f6f0a
--- /dev/null
+++ b/tests/st/ops/gpu/test_normal.py
@@ -0,0 +1,56 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.common import dtype as mstype
+from mindspore.ops import composite as C
+
+context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+
+
+class Net(nn.Cell):
+    def __init__(self, shape, seed=0):
+        super(Net, self).__init__()
+        self.shape = shape
+        self.seed = seed
+
+    def construct(self, mean, stddev):
+        return C.normal(self.shape, mean, stddev, self.seed)
+
+
+def test_net_1D():
+    seed = 10
+    shape = (3, 2, 4)
+    mean = 1.0
+    stddev = 1.0
+    net = Net(shape, seed)
+    tmean, tstddev = Tensor(mean, mstype.float32), Tensor(stddev, mstype.float32)
+    output = net(tmean, tstddev)
+    assert output.shape == (3, 2, 4)
+
+
+def test_net_ND():
+    seed = 10
+    shape = (3, 1, 2)
+    mean = np.array([[[1], [2]], [[3], [4]], [[5], [6]]]).astype(np.float32)
+    stddev = np.array([1.0]).astype(np.float32)
+    net = Net(shape, seed)
+    tmean, tstddev = Tensor(mean, mstype.float32), Tensor(stddev, mstype.float32)
+    output = net(tmean, tstddev)
+    assert output.shape == (3, 2, 2)
diff --git a/tests/st/ops/gpu/test_smoothl1loss_op.py b/tests/st/ops/gpu/test_smoothl1loss_op.py
new file mode 100644
index 00000000000..040f404eb02
--- /dev/null
+++ b/tests/st/ops/gpu/test_smoothl1loss_op.py
@@ -0,0 +1,81 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+import pytest
+
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.ops import composite as C
+
+context.set_context(mode=context.GRAPH_MODE, device_target="GPU", save_graphs=True)
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_smoothl1loss():
+    np.random.seed(42)
+    prediction = np.random.randn(20).astype(np.float32)
+    target = np.random.randn(20).astype(np.float32)
+    sigma = 1.0
+
+    net = nn.SmoothL1Loss(sigma)
+    loss = net(Tensor(prediction), Tensor(target))
+    expect = [0.46941718, 0.00382918, 0.16829303, 2.447778, 0.04812113, 0.05953304,
+              2.2302065, 0.07672881, 0.00860204, 0.34798968, 0.00956192, 1.818008,
+              0.03262977, 0.36599946, 2.047463, 0.2168481, 0.7216947, 1.7739174,
+              0.08826803, 1.109165]
+    assert np.allclose(loss.asnumpy(), expect)
+
+
+
+class Grad(nn.Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=True)
+        self.network = network
+
+    def construct(self, x1, x2, sens):
+        gout = self.grad(self.network)(x1, x2, sens)
+        return gout
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_smoothl1loss_grad():
+    np.random.seed(42)
+    prediction = np.random.randn(20).astype(np.float32)
+    target = np.random.randn(20).astype(np.float32)
+    sens = np.random.randn(20).astype(np.float32)
+    sigma = 1.0
+
+    net = nn.SmoothL1Loss(sigma)
+    grad = Grad(net)
+    dx = grad(Tensor(prediction), Tensor(target), Tensor(sens))
+
+    dx1_expect = [-0.71552587, 0.01499678, -0.06709455, -0.30110368, -0.45868093,
+                  0.24838912, -0.46063876, 0.41411355, 0.04507046, -1.4708229,
+                  0.04481723, 0.38508227, -0.17292616, -0.52333146, -1.0309995,
+                  0.61330026, 0.83921754, -0.3092124, 0.1391843, -0.9755451]
+
+    dx2_expect = [0.71552587, -0.01499678, 0.06709455, 0.30110368, 0.45868093,
+                  -0.24838912, 0.46063876, -0.41411355, -0.04507046, 1.4708229,
+                  -0.04481723, -0.38508227, 0.17292616, 0.52333146, 1.0309995,
+                  -0.61330026, -0.83921754, 0.3092124, -0.1391843, 0.9755451]
+
+    assert np.allclose(dx[0].asnumpy(), dx1_expect)
+    assert np.allclose(dx[1].asnumpy(), dx2_expect)
diff --git a/tests/st/pynative/test_implicit_conversion.py b/tests/st/pynative/test_implicit_conversion.py
new file mode 100644
index 00000000000..fce6c24cbb8
--- /dev/null
+++ b/tests/st/pynative/test_implicit_conversion.py
@@ -0,0 +1,81 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+""" test implicit conversion """
+import numpy as np
+
+from mindspore import Tensor
+
+
+def test_float_tensor_and_int_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = 2
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[2.1, 2.2, 2.3], [2.4, 2.5, 2.6]], dtype=np.float32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_bool_tensor_and_float_add():
+    x = Tensor(np.array([[True, False], [False, True]], dtype=np.bool_))
+    y = 3.3
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[4.3, 3.3], [3.3, 4.3]], dtype=np.float32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_bool_tensor_and_int_add():
+    x = Tensor(np.array([[True, False], [False, True]], dtype=np.bool_))
+    y = 3
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[4, 3], [3, 4]], dtype=np.int32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_bool_and_int_tensor_add():
+    x = True
+    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[2, 3, 4], [5, 6, 7]], dtype=np.int32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+def test_float_tensor_and_int_tensor_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]], dtype=np.float32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_float_tensor_and_float_tensor_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float64))
+    y = Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]], dtype=np.float64))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_int_tensor_and_int_tensor_add():
+    x = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int16))
+    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[2, 4, 6], [8, 10, 12]], dtype=np.int32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_float_tensor_and_bool_tensors_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = Tensor(np.array([[True, True, True], [False, False, False]], dtype=np.bool_))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[1.1, 1.2, 1.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
diff --git a/tests/st/pynative/test_pynative_hook.py b/tests/st/pynative/test_pynative_hook.py
new file mode 100644
index 00000000000..0ce4ba4f697
--- /dev/null
+++ b/tests/st/pynative/test_pynative_hook.py
@@ -0,0 +1,198 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import pytest
+import numpy as np
+import mindspore.nn as nn
+import mindspore.common.dtype as mstype
+
+from mindspore import Tensor
+from mindspore import context
+from mindspore import ParameterTuple
+from mindspore.nn import Momentum
+from mindspore.nn import WithLossCell
+from mindspore.ops import composite as C
+from mindspore.ops import operations as P
+from mindspore.common.initializer import TruncatedNormal
+
+context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+
+
+def weight_variable():
+    """weight initial"""
+    return TruncatedNormal(0.02)
+
+
+def conv(in_channels, out_channels, kernel_size, stride=1, padding=0):
+    """weight initial for conv layer"""
+    weight = weight_variable()
+    return nn.Conv2d(in_channels, out_channels,
+                     kernel_size=kernel_size, stride=stride, padding=padding,
+                     weight_init=weight, has_bias=False, pad_mode="valid")
+
+
+def fc_with_initialize(input_channels, out_channels):
+    """weight initial for fc layer"""
+    weight = weight_variable()
+    bias = weight_variable()
+    return nn.Dense(input_channels, out_channels, weight, bias)
+
+
+class test_custom_hook_function_base():
+    def __init__(self):
+        pass
+
+    def test_custom_hook_function(self, hook_function, cell_hook_function):
+        return hook_function, cell_hook_function
+
+
+def cell_hook_function_print_grad(cell_id, grad_input, grad_output):
+    assert grad_output[0].asnumpy().shape == (32, 6, 14, 14)
+    assert grad_input[0].asnumpy().shape == (32, 16, 10, 10)
+
+
+def custom_hook_function_print_and_save_grad(grad_out):
+    assert grad_out[0].asnumpy().shape == (32, 6, 28, 28)
+
+
+class LeNet5(nn.Cell):
+    def __init__(self, hook_function, cell_hook_function, num_class=10):
+        super(LeNet5, self).__init__()
+        self.num_class = num_class
+        self.batch_size = 32
+        self.conv1 = conv(1, 6, 5)
+        self.conv2 = conv(6, 16, 5)
+        self.conv1.register_backward_hook(cell_hook_function)
+        self.fc1 = fc_with_initialize(16 * 5 * 5, 120)
+        self.fc2 = fc_with_initialize(120, 84)
+        self.fc3 = fc_with_initialize(84, self.num_class)
+        self.relu = nn.ReLU()
+        self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.reshape = P.Reshape()
+        self.hook = P.HookBackward(hook_function)
+
+    def construct(self, x):
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.hook(x)
+        x = self.max_pool2d(x)
+        x = self.conv2(x)
+        x = self.relu(x)
+        x = self.max_pool2d(x)
+        x = self.reshape(x, (self.batch_size, -1))
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.relu(x)
+        x = self.fc3(x)
+        return x
+
+
+class GradWrap(nn.Cell):
+    """ GradWrap definition """
+    def __init__(self, network):
+        super(GradWrap, self).__init__(auto_prefix=False)
+        self.network = network
+        self.weights = ParameterTuple(filter(lambda x: x.requires_grad, network.get_parameters()))
+
+    def construct(self, x, label):
+        weights = self.weights
+        return C.GradOperation('get_by_list', get_by_list=True)(self.network, weights)(x, label)
+
+
+class test_custom_cell_base():
+    def __init__(self):
+        pass
+
+    def test_custom_cell_function(self, cell):
+        return cell
+
+
+class MulAdd(nn.Cell):
+    def __init__(self):
+        super(MulAdd, self).__init__()
+
+    def construct(self, x, y):
+        return 2 * x + y
+
+    def bprop(self, x, y, out, dout):
+        assert x.asnumpy() == 1.0
+        assert y.asnumpy() == 2.0
+        assert out.asnumpy() == 4.0
+        assert dout.asnumpy() == 1.0
+        return dout, y
+
+
+class Ms_Cell(nn.Cell):
+    def __init__(self):
+        super(Ms_Cell, self).__init__()
+        self.relu = P.ReLU()
+
+    def construct(self, x):
+        return self.relu(x)
+
+    def bprop(self, x, out, dout):
+        dout = Tensor(np.ones([5, 5]).astype(np.float32))
+        assert dout.shape == (5, 5)
+        return dout
+
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_pynative_lenet_train_hook_function_print_and_save_grad():
+    hook = test_custom_hook_function_base()
+    function = hook.test_custom_hook_function(custom_hook_function_print_and_save_grad,
+                                              cell_hook_function_print_grad)
+    net = LeNet5(hook_function=function[0], cell_hook_function=function[1])
+    optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False)
+    net_with_criterion = WithLossCell(net, criterion)
+    train_network = GradWrap(net_with_criterion)
+    train_network.set_train()
+
+    input_data = Tensor(np.ones([net.batch_size, 1, 32, 32]).astype(np.float32) * 0.01)
+    label = Tensor(np.ones([net.batch_size, net.num_class]).astype(np.float32))
+    output = net(Tensor(input_data))
+    criterion(output, label)
+    grads = train_network(input_data, label)
+    success = optimizer(grads)
+    assert success
+
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_pynative_custom_bprop_and_Cell_MulAdd():
+    custom_cell = test_custom_cell_base()
+    mul_add = custom_cell.test_custom_cell_function(MulAdd())
+    mul_add.bprop_debug = True
+    C.grad_all(mul_add)(Tensor(1, mstype.float32), Tensor(2, mstype.float32))
+    assert C.grad_all(mul_add)(Tensor(1, mstype.float32), Tensor(2, mstype.float32)) == \
+           (Tensor(1.0, mstype.float32), Tensor(2.0, mstype.float32))
+
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_pynative_custom_bprop_and_Cell_Ms_Cell():
+    custom_cell = test_custom_cell_base()
+    ms_Cell = custom_cell.test_custom_cell_function(Ms_Cell())
+    ms_Cell.bprop_debug = True
+    assert C.grad_all(ms_Cell)(Tensor(1, mstype.float32)) == (Tensor(1.0, mstype.float32),)
+    
\ No newline at end of file
diff --git a/tests/st/pynative/test_ascend_lenet.py b/tests/st/pynative/test_pynative_lenet.py
similarity index 98%
rename from tests/st/pynative/test_ascend_lenet.py
rename to tests/st/pynative/test_pynative_lenet.py
index 021c71d9cd3..c6166d05171 100644
--- a/tests/st/pynative/test_ascend_lenet.py
+++ b/tests/st/pynative/test_pynative_lenet.py
@@ -157,4 +157,5 @@ def test_ascend_pynative_lenet():
         total_time = total_time + cost_time
 
         print("======epoch: ", epoch, " loss: ", loss_output.asnumpy(), " cost time: ", cost_time)
-    assert loss_output.asnumpy() < 0.1
+    assert loss_output.asnumpy() < 0.004
+    assert loss_output.asnumpy() > 0.003
diff --git a/tests/st/pynative/test_pynative_resnet50.py b/tests/st/pynative/test_pynative_resnet50.py
new file mode 100644
index 00000000000..de9ecebb9cf
--- /dev/null
+++ b/tests/st/pynative/test_pynative_resnet50.py
@@ -0,0 +1,432 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import time
+import random
+import numpy as np
+import pytest
+
+import mindspore.common.dtype as mstype
+import mindspore.dataset as ds
+import mindspore.dataset.transforms.c_transforms as C
+import mindspore.dataset.transforms.vision.c_transforms as vision
+import mindspore.nn as nn
+import mindspore.ops.functional as F
+
+from mindspore import Tensor
+from mindspore import context
+from mindspore import ParameterTuple
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.ops import composite as CP
+from mindspore.nn.optim.momentum import Momentum
+from mindspore.common.initializer import initializer
+from mindspore.nn.wrap.cell_wrapper import WithLossCell
+
+random.seed(1)
+np.random.seed(1)
+ds.config.set_seed(1)
+
+
+def weight_variable(shape):
+    return initializer('XavierUniform', shape=shape, dtype=mstype.float32)
+
+
+def weight_variable_uniform(shape):
+    return initializer('Uniform', shape=shape, dtype=mstype.float32)
+
+
+def weight_variable_0(shape):
+    zeros = np.zeros(shape).astype(np.float32)
+    return Tensor(zeros)
+
+
+def weight_variable_1(shape):
+    ones = np.ones(shape).astype(np.float32)
+    return Tensor(ones)
+
+
+def conv3x3(in_channels, out_channels, stride=1, padding=0):
+    """3x3 convolution """
+    weight_shape = (out_channels, in_channels, 3, 3)
+    weight = weight_variable(weight_shape)
+    return nn.Conv2d(in_channels, out_channels,
+                     kernel_size=3, stride=stride, padding=padding, weight_init=weight, has_bias=False, pad_mode="same")
+
+
+def conv1x1(in_channels, out_channels, stride=1, padding=0):
+    """1x1 convolution"""
+    weight_shape = (out_channels, in_channels, 1, 1)
+    weight = weight_variable(weight_shape)
+    return nn.Conv2d(in_channels, out_channels,
+                     kernel_size=1, stride=stride, padding=padding, weight_init=weight, has_bias=False, pad_mode="same")
+
+
+def conv7x7(in_channels, out_channels, stride=1, padding=0):
+    """1x1 convolution"""
+    weight_shape = (out_channels, in_channels, 7, 7)
+    weight = weight_variable(weight_shape)
+    return nn.Conv2d(in_channels, out_channels,
+                     kernel_size=7, stride=stride, padding=padding, weight_init=weight, has_bias=False, pad_mode="same")
+
+
+def bn_with_initialize(out_channels):
+    shape = (out_channels)
+    mean = weight_variable_0(shape)
+    var = weight_variable_1(shape)
+    beta = weight_variable_0(shape)
+    gamma = weight_variable_uniform(shape)
+    bn = nn.BatchNorm2d(out_channels, momentum=0.99, eps=0.00001, gamma_init=gamma,
+                        beta_init=beta, moving_mean_init=mean, moving_var_init=var)
+    return bn
+
+
+def bn_with_initialize_last(out_channels):
+    shape = (out_channels)
+    mean = weight_variable_0(shape)
+    var = weight_variable_1(shape)
+    beta = weight_variable_0(shape)
+    gamma = weight_variable_uniform(shape)
+    bn = nn.BatchNorm2d(out_channels, momentum=0.99, eps=0.00001, gamma_init=gamma,
+                        beta_init=beta, moving_mean_init=mean, moving_var_init=var)
+    return bn
+
+
+def fc_with_initialize(input_channels, out_channels):
+    weight_shape = (out_channels, input_channels)
+    weight = weight_variable(weight_shape)
+    bias_shape = (out_channels)
+    bias = weight_variable_uniform(bias_shape)
+    return nn.Dense(input_channels, out_channels, weight, bias)
+
+
+class ResidualBlock(nn.Cell):
+    expansion = 4
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 stride=1):
+        super(ResidualBlock, self).__init__()
+
+        out_chls = out_channels // self.expansion
+        self.conv1 = conv1x1(in_channels, out_chls, stride=stride, padding=0)
+        self.bn1 = bn_with_initialize(out_chls)
+
+        self.conv2 = conv3x3(out_chls, out_chls, stride=1, padding=0)
+        self.bn2 = bn_with_initialize(out_chls)
+
+        self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0)
+        self.bn3 = bn_with_initialize_last(out_channels)
+
+        self.relu = P.ReLU()
+        self.add = P.TensorAdd()
+
+    def construct(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        out = self.add(out, identity)
+        out = self.relu(out)
+
+        return out
+
+
+class ResidualBlockWithDown(nn.Cell):
+    expansion = 4
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 stride=1,
+                 down_sample=False):
+        super(ResidualBlockWithDown, self).__init__()
+
+        out_chls = out_channels // self.expansion
+        self.conv1 = conv1x1(in_channels, out_chls, stride=stride, padding=0)
+        self.bn1 = bn_with_initialize(out_chls)
+
+        self.conv2 = conv3x3(out_chls, out_chls, stride=1, padding=0)
+        self.bn2 = bn_with_initialize(out_chls)
+
+        self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0)
+        self.bn3 = bn_with_initialize_last(out_channels)
+
+        self.relu = P.ReLU()
+        self.downSample = down_sample
+
+        self.conv_down_sample = conv1x1(in_channels, out_channels, stride=stride, padding=0)
+        self.bn_down_sample = bn_with_initialize(out_channels)
+        self.add = P.TensorAdd()
+
+    def construct(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        identity = self.conv_down_sample(identity)
+        identity = self.bn_down_sample(identity)
+
+        out = self.add(out, identity)
+        out = self.relu(out)
+
+        return out
+
+
+class MakeLayer0(nn.Cell):
+
+    def __init__(self, block, in_channels, out_channels, stride):
+        super(MakeLayer0, self).__init__()
+        self.a = ResidualBlockWithDown(in_channels, out_channels, stride=1, down_sample=True)
+        self.b = block(out_channels, out_channels, stride=stride)
+        self.c = block(out_channels, out_channels, stride=1)
+
+    def construct(self, x):
+        x = self.a(x)
+        x = self.b(x)
+        x = self.c(x)
+
+        return x
+
+
+class MakeLayer1(nn.Cell):
+
+    def __init__(self, block, in_channels, out_channels, stride):
+        super(MakeLayer1, self).__init__()
+        self.a = ResidualBlockWithDown(in_channels, out_channels, stride=stride, down_sample=True)
+        self.b = block(out_channels, out_channels, stride=1)
+        self.c = block(out_channels, out_channels, stride=1)
+        self.d = block(out_channels, out_channels, stride=1)
+
+    def construct(self, x):
+        x = self.a(x)
+        x = self.b(x)
+        x = self.c(x)
+        x = self.d(x)
+
+        return x
+
+
+class MakeLayer2(nn.Cell):
+
+    def __init__(self, block, in_channels, out_channels, stride):
+        super(MakeLayer2, self).__init__()
+        self.a = ResidualBlockWithDown(in_channels, out_channels, stride=stride, down_sample=True)
+        self.b = block(out_channels, out_channels, stride=1)
+        self.c = block(out_channels, out_channels, stride=1)
+        self.d = block(out_channels, out_channels, stride=1)
+        self.e = block(out_channels, out_channels, stride=1)
+        self.f = block(out_channels, out_channels, stride=1)
+
+    def construct(self, x):
+        x = self.a(x)
+        x = self.b(x)
+        x = self.c(x)
+        x = self.d(x)
+        x = self.e(x)
+        x = self.f(x)
+
+        return x
+
+
+class MakeLayer3(nn.Cell):
+
+    def __init__(self, block, in_channels, out_channels, stride):
+        super(MakeLayer3, self).__init__()
+        self.a = ResidualBlockWithDown(in_channels, out_channels, stride=stride, down_sample=True)
+        self.b = block(out_channels, out_channels, stride=1)
+        self.c = block(out_channels, out_channels, stride=1)
+
+    def construct(self, x):
+        x = self.a(x)
+        x = self.b(x)
+        x = self.c(x)
+
+        return x
+
+
+class ResNet(nn.Cell):
+
+    def __init__(self, block, num_classes=100, batch_size=32):
+        super(ResNet, self).__init__()
+        self.batch_size = batch_size
+        self.num_classes = num_classes
+
+        self.conv1 = conv7x7(3, 64, stride=2, padding=0)
+
+        self.bn1 = bn_with_initialize(64)
+        self.relu = P.ReLU()
+        self.maxpool = P.MaxPoolWithArgmax(ksize=3, strides=2, padding="SAME")
+
+        self.layer1 = MakeLayer0(block, in_channels=64, out_channels=256, stride=1)
+        self.layer2 = MakeLayer1(block, in_channels=256, out_channels=512, stride=2)
+        self.layer3 = MakeLayer2(block, in_channels=512, out_channels=1024, stride=2)
+        self.layer4 = MakeLayer3(block, in_channels=1024, out_channels=2048, stride=2)
+
+        self.pool = P.ReduceMean(keep_dims=True)
+        self.squeeze = P.Squeeze(axis=(2, 3))
+        self.fc = fc_with_initialize(512 * block.expansion, num_classes)
+
+    def construct(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)[0]
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.pool(x, (2, 3))
+        x = self.squeeze(x)
+        x = self.fc(x)
+        return x
+
+
+def resnet50(batch_size, num_classes):
+    return ResNet(ResidualBlock, num_classes, batch_size)
+
+
+def create_dataset(repeat_num=1, training=True, batch_size=32):
+    data_home = "/home/workspace/mindspore_dataset"
+    data_dir = data_home + "/cifar-10-batches-bin"
+    if not training:
+        data_dir = data_home + "/cifar-10-verify-bin"
+    data_set = ds.Cifar10Dataset(data_dir)
+
+    resize_height = 224
+    resize_width = 224
+    rescale = 1.0 / 255.0
+    shift = 0.0
+
+    # define map operations
+    random_crop_op = vision.RandomCrop((32, 32), (4, 4, 4, 4))  # padding_mode default CONSTANT
+    random_horizontal_op = vision.RandomHorizontalFlip()
+    # interpolation default BILINEAR
+    resize_op = vision.Resize((resize_height, resize_width))
+    rescale_op = vision.Rescale(rescale, shift)
+    normalize_op = vision.Normalize((0.4465, 0.4822, 0.4914), (0.2010, 0.1994, 0.2023))
+    changeswap_op = vision.HWC2CHW()
+    type_cast_op = C.TypeCast(mstype.int32)
+
+    c_trans = []
+    if training:
+        c_trans = [random_crop_op, random_horizontal_op]
+    c_trans += [resize_op, rescale_op, normalize_op,
+                changeswap_op]
+
+    # apply map operations on images
+    data_set = data_set.map(input_columns="label", operations=type_cast_op)
+    data_set = data_set.map(input_columns="image", operations=c_trans)
+
+    # apply shuffle operations
+    data_set = data_set.shuffle(buffer_size=1000)
+
+    # apply batch operations
+    data_set = data_set.batch(batch_size=batch_size, drop_remainder=True)
+
+    # apply repeat operations
+    data_set = data_set.repeat(repeat_num)
+
+    return data_set
+
+
+class CrossEntropyLoss(nn.Cell):
+    def __init__(self):
+        super(CrossEntropyLoss, self).__init__()
+        self.cross_entropy = P.SoftmaxCrossEntropyWithLogits()
+        self.mean = P.ReduceMean()
+        self.one_hot = P.OneHot()
+        self.one = Tensor(1.0, mstype.float32)
+        self.zero = Tensor(0.0, mstype.float32)
+
+    def construct(self, logits, label):
+        label = self.one_hot(label, F.shape(logits)[1], self.one, self.zero)
+        loss = self.cross_entropy(logits, label)[0]
+        loss = self.mean(loss, (-1,))
+        return loss
+
+
+class GradWrap(Cell):
+    """ GradWrap definition """
+
+    def __init__(self, network):
+        super(GradWrap, self).__init__()
+        self.network = network
+        self.weights = ParameterTuple(network.trainable_params())
+
+    def construct(self, x, label):
+        weights = self.weights
+        return CP.grad_by_list(self.network, weights)(x, label)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_pynative_resnet50():
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+
+    batch_size = 32
+    num_classes = 10
+    net = resnet50(batch_size, num_classes)
+    criterion = CrossEntropyLoss()
+    optimizer = Momentum(learning_rate=0.01, momentum=0.9,
+                         params=filter(lambda x: x.requires_grad, net.get_parameters()))
+
+    net_with_criterion = WithLossCell(net, criterion)
+    net_with_criterion.set_grad()
+    train_network = GradWrap(net_with_criterion)
+    train_network.set_train()
+
+    step = 0
+    max_step = 20
+    data_set = create_dataset(repeat_num=1, training=True, batch_size=batch_size)
+    for element in data_set.create_dict_iterator():
+        step = step + 1
+        if step > max_step:
+            break
+        start_time = time.time()
+        input_data = Tensor(element["image"])
+        input_label = Tensor(element["label"])
+        loss_output = net_with_criterion(input_data, input_label)
+        grads = train_network(input_data, input_label)
+        optimizer(grads)
+        end_time = time.time()
+        cost_time = end_time - start_time
+        print("======step: ", step, " loss: ", loss_output.asnumpy(), " cost time: ", cost_time)
+        if step > 1:
+            assert cost_time < 0.3
+        
\ No newline at end of file
diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt
index dcc798165b7..880a2810378 100644
--- a/tests/ut/cpp/CMakeLists.txt
+++ b/tests/ut/cpp/CMakeLists.txt
@@ -17,6 +17,7 @@ message("PYTHON_INCLUDE_DIRS = ${PYTHON_INCLUDE_DIRS}")
 message("PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}")
 include_directories(${PYTHON_INCLUDE_DIRS})
 include_directories(${MS_CCSRC_PATH})
+include_directories(${CMAKE_SOURCE_DIR}/mindspore/core)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/stub/runtime/)
 include_directories(${CMAKE_BINARY_DIR})
@@ -27,12 +28,20 @@ link_directories(${MS_CCSRC_BUILD_PATH})
 
 if(ENABLE_MINDDATA)
     add_definitions(-D ENABLE_MINDDATA)
-    link_directories(${MS_CCSRC_BUILD_PATH}/dataset)
-    link_directories(${MS_CCSRC_BUILD_PATH}/mindrecord)
+    link_directories(${MS_CCSRC_BUILD_PATH}/minddata/dataset)
+    link_directories(${MS_CCSRC_BUILD_PATH}/minddata/mindrecord)
 endif()
 # fetch ut test files
 if(ENABLE_MINDDATA)
-    file(GLOB_RECURSE UT_SRCS ./*.cc)
+    file(GLOB_RECURSE UT_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ./*.cc)
+    if(NOT ENABLE_PYTHON)
+        set(PYTHON_RELATED_SRCS
+            dataset/filter_op_test.cc
+            dataset/voc_op_test.cc
+            dataset/manifest_op_test.cc
+        )
+        list(REMOVE_ITEM UT_SRCS ${PYTHON_RELATED_SRCS})
+    endif()
 else()
     file(GLOB_RECURSE TEMP_UT_SRCS ./*.cc)
     foreach(OBJ ${TEMP_UT_SRCS})
@@ -43,78 +52,83 @@ else()
 endif()
 
 file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "../../../mindspore/ccsrc/ir/*.cc"
+        "../../../mindspore/core/base/*.cc"
+        "../../../mindspore/core/abstract/*.cc"
+        "../../../mindspore/core/ir/*.cc"
         "../../../mindspore/ccsrc/common/*.cc"
         "../../../mindspore/ccsrc/utils/*.cc"
-        "../../../mindspore/ccsrc/parallel/*.cc"
-        "../../../mindspore/ccsrc/pipeline/parse/*.cc"
-        "../../../mindspore/ccsrc/pipeline/static_analysis/*.cc"
-        "../../../mindspore/ccsrc/pipeline/pipeline.cc"
-        "../../../mindspore/ccsrc/pipeline/resource.cc"
-        "../../../mindspore/ccsrc/pipeline/pass.cc"
-        "../../../mindspore/ccsrc/pipeline/action.cc"
-        "../../../mindspore/ccsrc/pipeline/validator.cc"
-        "../../../mindspore/ccsrc/pipeline/remove_value_node_dup.cc"
-        "../../../mindspore/ccsrc/optimizer/*.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/parse/*.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/static_analysis/*.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/pipeline.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/resource.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/pass.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/action.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/validator.cc"
+        "../../../mindspore/ccsrc/pipeline/jit/remove_value_node_dup.cc"
+        "../../../mindspore/ccsrc/frontend/optimizer/*.cc"
+        "../../../mindspore/ccsrc/frontend/parallel/*.cc"
         "../../../mindspore/ccsrc/debug/*.cc"
-        "../../../mindspore/ccsrc/operator/*.cc"
-        "../../../mindspore/ccsrc/transform/*.cc"
-        "../../../mindspore/ccsrc/session/anf_runtime_algorithm.cc"
-        "../../../mindspore/ccsrc/session/ascend_session.cc"
-        "../../../mindspore/ccsrc/session/ascend_control_parser.cc"
-        "../../../mindspore/ccsrc/session/kernel_graph.cc"
-        "../../../mindspore/ccsrc/session/session_basic.cc"
-        "../../../mindspore/ccsrc/session/session_factory.cc"
+        "../../../mindspore/ccsrc/frontend/operator/*.cc"
+        "../../../mindspore/ccsrc/transform/graph_ir/*.cc"
+        "../../../mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc"
+        "../../../mindspore/ccsrc/backend/session/ascend_session.cc"
+        "../../../mindspore/ccsrc/backend/session/ascend_control_parser.cc"
+        "../../../mindspore/ccsrc/backend/session/kernel_graph.cc"
+        "../../../mindspore/ccsrc/backend/session/session_basic.cc"
+        "../../../mindspore/ccsrc/backend/session/session_factory.cc"
         "../../../mindspore/ccsrc/vm/*.cc"
-        "../../../mindspore/ccsrc/pynative/*.cc"
+        "../../../mindspore/ccsrc/pipeline/pynative/*.cc"
         "../../../mindspore/ccsrc/pybind_api/*.cc"
-        "../../../mindspore/ccsrc/kernel/akg/*.cc"
-        "../../../mindspore/ccsrc/kernel/kash/*.cc"
-        "../../../mindspore/ccsrc/kernel/cce/*.cc"
-        "../../../mindspore/ccsrc/kernel/rts/*.cc"
-        "../../../mindspore/ccsrc/kernel/hccl/*.cc"
-        "../../../mindspore/ccsrc/kernel/kernel_query.cc"
-        "../../../mindspore/ccsrc/kernel/kernel_build_info.cc"
-        "../../../mindspore/ccsrc/pre_activate/ascend/*.cc"
-        "../../../mindspore/ccsrc/pre_activate/common/*.cc"
-        "../../../mindspore/ccsrc/pre_activate/gpu/*.cc"
-        "../../../mindspore/ccsrc/pre_activate/mem_reuse/*.cc"
-        "../../../mindspore/ccsrc/pre_activate/pass/*.cc"
-        "../../../mindspore/ccsrc/kernel/aicpu/aicpu_kernel_metadata.cc"
-        "../../../mindspore/ccsrc/kernel/rts/rt_kernel_info.cc"
-        "../../../mindspore/ccsrc/kernel/common_utils.cc"
-        "../../../mindspore/ccsrc/kernel/oplib/*.cc"
-        "../../../mindspore/ccsrc/kernel/tbe/*.cc"
-        "../../../mindspore/ccsrc/device/kernel_runtime.cc"
-        "../../../mindspore/ccsrc/device/memory_manager.cc"
-        "../../../mindspore/ccsrc/device/kernel_runtime_manager.cc"
-        "../../../mindspore/ccsrc/device/kernel_info.cc"
-        "../../../mindspore/ccsrc/device/ascend/profiling/*.cc"
-        "../../../mindspore/ccsrc/device/ascend/kernel_select_ascend.cc"
-        "../../../mindspore/ccsrc/device/ascend/kernel_select_graph_kernel.cc"
-        "../../../mindspore/ccsrc/device/convert_tensor_utils.cc"
-        "../../../mindspore/ccsrc/device/ascend/kernel_build_ascend.cc"
-        "../../../mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc"
-        "../../../mindspore/ccsrc/device/ascend/ascend_memory_manager.cc"
-        "../../../mindspore/ccsrc/device/ascend/ascend_device_address.cc"
-        "../../../mindspore/ccsrc/device/ascend/ascend_memory_pool.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/akg/*.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/kash/*.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/rts/*.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/hccl/*.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/kernel_build_info.cc"
+        "../../../mindspore/ccsrc/backend/optimizer/ascend/*.cc"
+        "../../../mindspore/ccsrc/backend/optimizer/common/*.cc"
+        "../../../mindspore/ccsrc/backend/optimizer/gpu/*.cc"
+        "../../../mindspore/ccsrc/backend/optimizer/mem_reuse/*.cc"
+        "../../../mindspore/ccsrc/backend/optimizer/pass/*.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/rts/rt_kernel_info.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/common_utils.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/oplib/*.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/tbe/*.cc"
+        "../../../mindspore/ccsrc/runtime/device/kernel_runtime.cc"
+        "../../../mindspore/ccsrc/runtime/device/memory_manager.cc"
+        "../../../mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc"
+        "../../../mindspore/ccsrc/runtime/device/kernel_info.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/profiling/*.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/kernel_select_graph_kernel.cc"
+        "../../../mindspore/ccsrc/runtime/device/convert_tensor_utils.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc"
+        "../../../mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc"
         "../../../mindspore/ccsrc/predict/generator/utils/ir_model_util.cc"
         "../../../mindspore/ccsrc/predict/predict.cc"
         "../../../mindspore/ccsrc/predict/converter/*.cc"
         "../../../mindspore/ccsrc/predict/converter/attr_utils/*.cc"
         "../../../mindspore/ccsrc/predict/converter/lite_model/*.cc"
         "../../../mindspore/ccsrc/predict/converter/lite_model/operations/*.cc"
-        "../../../mindspore/ccsrc/kernel/cpu/cpu_kernel.cc"
-        "../../../mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.cc"
-        "../../../mindspore/ccsrc/kernel/cpu/sparse_apply_adam_cpu_kernel.cc"
-        "../../../mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc"
-        "../../../mindspore/ccsrc/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.cc"
-        "../../../mindspore/ccsrc/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.cc"
+        "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc"
         )
 
 list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/dump_proto.cc")
-list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ir/lite/tensor.cc")
-list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
+list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/core/ir/lite/tensor.cc")
+list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
+list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/frontend/parallel/ps/util.cc")
+list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/frontend/parallel/ps/scheduler.cc")
+list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/frontend/parallel/ps/optimizer_info.cc")
+list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.cc")
 list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/anf_ir.pb.cc")
 list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/node_strategy.pb.cc")
 list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc")
diff --git a/tests/ut/cpp/pipeline/static_analysis/abstract_test.cc b/tests/ut/cpp/abstract/abstract_test.cc
similarity index 90%
rename from tests/ut/cpp/pipeline/static_analysis/abstract_test.cc
rename to tests/ut/cpp/abstract/abstract_test.cc
index 93baf86c3ea..2e3a2a8d1ac 100644
--- a/tests/ut/cpp/pipeline/static_analysis/abstract_test.cc
+++ b/tests/ut/cpp/abstract/abstract_test.cc
@@ -18,13 +18,13 @@
 
 #include "common/common_test.h"
 
-#include "pipeline/static_analysis/static_analysis.h"
-#include "pipeline/static_analysis/utils.h"
-#include "pipeline/static_analysis/prim.h"
-#include "pipeline/parse/parse.h"
-#include "pipeline/parse/resolve.h"
-#include "pipeline/parse/data_converter.h"
-#include "operator/ops.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
+#include "abstract/utils.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "pipeline/jit/parse/parse.h"
+#include "pipeline/jit/parse/resolve.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/tests/ut/cpp/pipeline/static_analysis/dshape_test.cc b/tests/ut/cpp/abstract/dshape_test.cc
similarity index 97%
rename from tests/ut/cpp/pipeline/static_analysis/dshape_test.cc
rename to tests/ut/cpp/abstract/dshape_test.cc
index ae18f7730b7..da0e9ed3eef 100644
--- a/tests/ut/cpp/pipeline/static_analysis/dshape_test.cc
+++ b/tests/ut/cpp/abstract/dshape_test.cc
@@ -18,7 +18,7 @@
 
 #include "common/common_test.h"
 
-#include "pipeline/static_analysis/dshape.h"
+#include "abstract/dshape.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pipeline/static_analysis/utils_test.cc b/tests/ut/cpp/abstract/utils_test.cc
similarity index 95%
rename from tests/ut/cpp/pipeline/static_analysis/utils_test.cc
rename to tests/ut/cpp/abstract/utils_test.cc
index dceef71b024..33cada28d71 100644
--- a/tests/ut/cpp/pipeline/static_analysis/utils_test.cc
+++ b/tests/ut/cpp/abstract/utils_test.cc
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pipeline/static_analysis/utils.h"
+#include "abstract/utils.h"
 
 #include "common/common_test.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/tests/ut/cpp/ir/base_test.cc b/tests/ut/cpp/base/base_test.cc
similarity index 99%
rename from tests/ut/cpp/ir/base_test.cc
rename to tests/ut/cpp/base/base_test.cc
index 0b4e8a637b4..71a7999e0f2 100644
--- a/tests/ut/cpp/ir/base_test.cc
+++ b/tests/ut/cpp/base/base_test.cc
@@ -17,7 +17,7 @@
 
 #include "common/common_test.h"
 #include "utils/any.h"
-#include "ir/base.h"
+#include "base/base.h"
 #include "ir/anf.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/common/backend_common_test.cc b/tests/ut/cpp/common/backend_common_test.cc
index 060b170a8c5..37103492988 100644
--- a/tests/ut/cpp/common/backend_common_test.cc
+++ b/tests/ut/cpp/common/backend_common_test.cc
@@ -20,11 +20,11 @@
 #include <memory>
 
 #include "utils/log_adapter.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "debug/anf_ir_dump.h"
-#include "session/ascend_session.h"
-#include "pipeline/resource.h"
-#include "pipeline/action.h"
+#include "backend/session/ascend_session.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/action.h"
 #include "ir/anf.h"
 #include "ir/manager.h"
 
diff --git a/tests/ut/cpp/common/backend_common_test.h b/tests/ut/cpp/common/backend_common_test.h
index fb3334182af..f5bfc9d6dd4 100644
--- a/tests/ut/cpp/common/backend_common_test.h
+++ b/tests/ut/cpp/common/backend_common_test.h
@@ -17,7 +17,7 @@
 #define TESTS_UT_CPP_COMMON_UT_BACKEND_COMMON_H_
 #include "common/common_test.h"
 #include "utils/context/ms_context.h"
-#include "session/kernel_graph.h"
+#include "backend/session/kernel_graph.h"
 
 namespace mindspore {
 class BackendCommon : public UT::Common {
diff --git a/tests/ut/cpp/common/py_func_graph_fetcher.h b/tests/ut/cpp/common/py_func_graph_fetcher.h
index 98552a96b54..d864842760e 100644
--- a/tests/ut/cpp/common/py_func_graph_fetcher.h
+++ b/tests/ut/cpp/common/py_func_graph_fetcher.h
@@ -22,8 +22,8 @@
 #include "ir/primitive.h"
 #include "ir/manager.h"
 #include "ir/func_graph.h"
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/parse.h"
 #include "./common.h"
 
 namespace UT {
diff --git a/tests/ut/cpp/common/test_main.cc b/tests/ut/cpp/common/test_main.cc
index f0cfc1778c3..fa456ed2602 100644
--- a/tests/ut/cpp/common/test_main.cc
+++ b/tests/ut/cpp/common/test_main.cc
@@ -16,8 +16,8 @@
 #include <iostream>
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "pipeline/pipeline.h"
-#include "pipeline/resource.h"
+#include "pipeline/jit/pipeline.h"
+#include "pipeline/jit/resource.h"
 
 namespace mindspore {
   extern void InitSubModulesLogLevel();
diff --git a/tests/ut/cpp/dataset/CMakeLists.txt b/tests/ut/cpp/dataset/CMakeLists.txt
index 129864ca0f7..8bbf42a6404 100644
--- a/tests/ut/cpp/dataset/CMakeLists.txt
+++ b/tests/ut/cpp/dataset/CMakeLists.txt
@@ -11,6 +11,7 @@ SET(DE_UT_SRCS
         interrupt_test.cc
         image_folder_op_test.cc
         buddy_test.cc
+        bounding_box_augment_op_test.cc
         arena_test.cc
         btree_test.cc
         center_crop_op_test.cc
@@ -35,20 +36,26 @@ SET(DE_UT_SRCS
         project_op_test.cc
         queue_test.cc
         random_crop_op_test.cc
+        random_crop_with_bbox_op_test.cc
         random_crop_decode_resize_op_test.cc
         random_crop_and_resize_op_test.cc
+        random_crop_and_resize_with_bbox_op_test.cc
         random_color_adjust_op_test.cc
         random_horizontal_flip_op_test.cc
+        random_horizontal_flip_with_bbox_test.cc
         random_resize_op_test.cc
+        random_resize_with_bbox_op_test.cc
         random_rotation_op_test.cc
         random_vertical_flip_op_test.cc
+        random_vertical_flip_with_bbox_op_test.cc
         rename_op_test.cc
         repeat_op_test.cc
         skip_op_test.cc
         rescale_op_test.cc
         resize_bilinear_op_test.cc
         resize_op_test.cc
-	schema_test.cc
+        resize_with_bbox_op_test.cc
+	    schema_test.cc
         shuffle_op_test.cc
         stand_alone_samplers_test.cc
         status_test.cc
@@ -83,6 +90,8 @@ SET(DE_UT_SRCS
         concatenate_op_test.cc
         cyclic_array_test.cc
         perf_data_test.cc
+        c_api_test.cc
+		tensor_op_fusion_pass_test.cc
         )
 
 add_executable(de_ut_tests ${DE_UT_SRCS})
diff --git a/tests/ut/cpp/dataset/arena_test.cc b/tests/ut/cpp/dataset/arena_test.cc
index e8698ad979b..10d27b51c66 100644
--- a/tests/ut/cpp/dataset/arena_test.cc
+++ b/tests/ut/cpp/dataset/arena_test.cc
@@ -15,7 +15,7 @@
  */
 
 #include <string>
-#include "dataset/util/arena.h"
+#include "minddata/dataset/util/arena.h"
 #include "common/common.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/dataset/batch_op_test.cc b/tests/ut/cpp/dataset/batch_op_test.cc
index a04da06e4ed..3e1f3c0b320 100644
--- a/tests/ut/cpp/dataset/batch_op_test.cc
+++ b/tests/ut/cpp/dataset/batch_op_test.cc
@@ -16,14 +16,14 @@
 #include <iostream>
 #include <memory>
 #include <string>
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "gtest/gtest.h"
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/global_context.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 
 namespace common = mindspore::common;
 namespace de = mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/bit_functions_test.cc b/tests/ut/cpp/dataset/bit_functions_test.cc
index 02b6a25f767..cf1c1562db4 100644
--- a/tests/ut/cpp/dataset/bit_functions_test.cc
+++ b/tests/ut/cpp/dataset/bit_functions_test.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/constants.h"
+#include "minddata/dataset/core/constants.h"
 #include "common/common.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/bounding_box_augment_op_test.cc b/tests/ut/cpp/dataset/bounding_box_augment_op_test.cc
new file mode 100644
index 00000000000..dc59d39facf
--- /dev/null
+++ b/tests/ut/cpp/dataset/bounding_box_augment_op_test.cc
@@ -0,0 +1,52 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "common/bboxop_common.h"
+#include "minddata/dataset/kernels/image/bounding_box_augment_op.h"
+#include "minddata/dataset/kernels/image/random_rotation_op.h"
+#include "utils/log_adapter.h"
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+
+const bool kSaveExpected = false;
+const char kOpName[] = "BoundingBoxAugmentOp";
+
+class MindDataTestBoundingBoxAugmentOp : public UT::CVOP::BBOXOP::BBoxOpCommon {
+ protected:
+  MindDataTestBoundingBoxAugmentOp() : UT::CVOP::BBOXOP::BBoxOpCommon() {}
+};
+
+TEST_F(MindDataTestBoundingBoxAugmentOp, TestOp) {
+  MS_LOG(INFO) << "Doing testBoundingBoxAugment.";
+  TensorTable results;
+  std::unique_ptr<BoundingBoxAugmentOp> op =
+    std::make_unique<BoundingBoxAugmentOp>(std::make_shared<RandomRotationOp>(90, 90), 1);
+  for (const auto &row : images_and_annotations_) {
+    TensorRow output_row;
+    Status s = op->Compute(row, &output_row);
+    EXPECT_TRUE(s.IsOk());
+    results.push_back(output_row);
+  }
+  if (kSaveExpected) {
+    SaveImagesWithAnnotations(FileType::kExpected, std::string(kOpName), results);
+  }
+  SaveImagesWithAnnotations(FileType::kActual, std::string(kOpName), results);
+  if (!kSaveExpected) {
+    CompareActualAndExpected(std::string(kOpName));
+  }
+}
diff --git a/tests/ut/cpp/dataset/btree_test.cc b/tests/ut/cpp/dataset/btree_test.cc
index 67b6c4e6c7c..9fa4fce8126 100644
--- a/tests/ut/cpp/dataset/btree_test.cc
+++ b/tests/ut/cpp/dataset/btree_test.cc
@@ -15,10 +15,10 @@
  */
 
 #include <sstream>
-#include "dataset/util/btree.h"
-#include "dataset/util/auto_index.h"
-#include "dataset/util/system_pool.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/btree.h"
+#include "minddata/dataset/util/auto_index.h"
+#include "minddata/dataset/util/system_pool.h"
+#include "minddata/dataset/util/task_manager.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/c_api_test.cc b/tests/ut/cpp/dataset/c_api_test.cc
new file mode 100644
index 00000000000..902bc9a43b8
--- /dev/null
+++ b/tests/ut/cpp/dataset/c_api_test.cc
@@ -0,0 +1,771 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <vector>
+#include <string>
+
+#include "utils/log_adapter.h"
+#include "common/utils.h"
+#include "common/common.h"
+#include "gtest/gtest.h"
+#include "securec.h"
+#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/include/status.h"
+#include "minddata/dataset/include/transforms.h"
+#include "minddata/dataset/include/iterator.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/include/samplers.h"
+
+using namespace mindspore::dataset::api;
+using mindspore::MsLogLevel::ERROR;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::LogStream;
+using mindspore::dataset::Tensor;
+using mindspore::dataset::Status;
+using mindspore::dataset::BorderType;
+
+
+class MindDataTestPipeline : public UT::DatasetOpTesting {
+ protected:
+};
+
+
+TEST_F(MindDataTestPipeline, TestBatchAndRepeat) {
+  // Create a Mnist Dataset
+  std::string folder_path = datasets_root_path_ + "/testMnistData/";
+  std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 2;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 10);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestTensorOpsAndMap) {
+  // Create a Mnist Dataset
+  std::string folder_path = datasets_root_path_ + "/testMnistData/";
+  std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 20));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> resize_op = vision::Resize({30, 30});
+  EXPECT_TRUE(resize_op != nullptr);
+
+  std::shared_ptr<TensorOperation> center_crop_op = vision::CenterCrop({16, 16});
+  EXPECT_TRUE(center_crop_op != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({resize_op, center_crop_op});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 40);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestUniformAugWithOps) {
+  // Create a Mnist Dataset
+  std::string folder_path = datasets_root_path_ + "/testMnistData/";
+  std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 20));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 1;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> resize_op = vision::Resize({30, 30});
+  EXPECT_TRUE(resize_op != nullptr);
+
+  std::shared_ptr<TensorOperation> random_crop_op = vision::RandomCrop({28, 28});
+  EXPECT_TRUE(random_crop_op != nullptr);
+
+  std::shared_ptr<TensorOperation> center_crop_op = vision::CenterCrop({16, 16});
+  EXPECT_TRUE(center_crop_op != nullptr);
+
+  std::shared_ptr<TensorOperation> uniform_aug_op = vision::UniformAugment({random_crop_op, center_crop_op}, 2);
+  EXPECT_TRUE(uniform_aug_op != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({resize_op, uniform_aug_op});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestRandomFlip) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> random_vertical_flip_op = vision::RandomVerticalFlip(0.5);
+  EXPECT_TRUE(random_vertical_flip_op != nullptr);
+
+  std::shared_ptr<TensorOperation> random_horizontal_flip_op = vision::RandomHorizontalFlip(0.5);
+  EXPECT_TRUE(random_horizontal_flip_op != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({random_vertical_flip_op, random_horizontal_flip_op});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 2;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 10);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) {
+  std::shared_ptr<SamplerObj> sampl = DistributedSampler(2, 1);
+  EXPECT_NE(sampl, nullptr);
+
+  sampl = PKSampler(3);
+  EXPECT_NE(sampl, nullptr);
+
+  sampl = RandomSampler(false, 12);
+  EXPECT_NE(sampl, nullptr);
+
+  sampl = SequentialSampler(0, 12);
+  EXPECT_NE(sampl, nullptr);
+
+  std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
+  sampl = WeightedRandomSampler(weights, 12);
+  EXPECT_NE(sampl, nullptr);
+
+  std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23};
+  sampl = SubsetRandomSampler(indices);
+  EXPECT_NE(sampl, nullptr);
+
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
+  EXPECT_NE(ds, nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_NE(ds, nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 2;
+  ds = ds->Batch(batch_size);
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 12);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestPad) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> pad_op1 = vision::Pad({1, 2, 3, 4}, {0}, BorderType::kSymmetric);
+  EXPECT_TRUE(pad_op1 != nullptr);
+
+  std::shared_ptr<TensorOperation> pad_op2 = vision::Pad({1}, {1, 1, 1}, BorderType::kEdge);
+  EXPECT_TRUE(pad_op2 != nullptr);
+
+  std::shared_ptr<TensorOperation> pad_op3 = vision::Pad({1, 4});
+  EXPECT_TRUE(pad_op3 != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({pad_op1, pad_op2, pad_op3});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+  i++;
+  auto image = row["image"];
+  MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+  iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestCutOut) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> cut_out1 = vision::CutOut(30, 5);
+  EXPECT_TRUE(cut_out1!= nullptr);
+
+  std::shared_ptr<TensorOperation> cut_out2 = vision::CutOut(30);
+  EXPECT_TRUE(cut_out2 != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({cut_out1, cut_out2});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+  i++;
+  auto image = row["image"];
+  MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+  iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestNormalize) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> normalize = vision::Normalize({121.0, 115.0, 100.0}, {70.0, 68.0, 71.0});
+  EXPECT_TRUE(normalize != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({normalize});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestDecode) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> decode = vision::Decode(true);
+  EXPECT_TRUE(decode != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({decode});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+  EXPECT_EQ(i, 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestShuffleDataset) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Shuffle operation on ds
+  int32_t shuffle_size = 10;
+  ds = ds->Shuffle(shuffle_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 2;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 10);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestCifar10Dataset) {
+
+  // Create a Cifar10 Dataset
+  std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
+  std::shared_ptr<Dataset> ds = Cifar10(folder_path, 0, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 2;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+  i++;
+  auto image = row["image"];
+  MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+  iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 10);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestRandomColorAdjust) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> random_color_adjust1 = vision::RandomColorAdjust({1.0}, {0.0}, {0.5}, {0.5});
+  EXPECT_TRUE(random_color_adjust1 != nullptr);
+
+  std::shared_ptr<TensorOperation> random_color_adjust2 = vision::RandomColorAdjust({1.0, 1.0}, {0.0, 0.0}, {0.5, 0.5},
+                                                                                    {0.5, 0.5});
+  EXPECT_TRUE(random_color_adjust2 != nullptr);
+
+  std::shared_ptr<TensorOperation> random_color_adjust3 = vision::RandomColorAdjust({0.5, 1.0}, {0.0, 0.5}, {0.25, 0.5},
+                                                                             {0.25, 0.5});
+  EXPECT_TRUE(random_color_adjust3 != nullptr);
+
+  std::shared_ptr<TensorOperation> random_color_adjust4 = vision::RandomColorAdjust();
+  EXPECT_TRUE(random_color_adjust4 != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({random_color_adjust1, random_color_adjust2, random_color_adjust3, random_color_adjust4});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+  i++;
+  auto image = row["image"];
+  MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+  iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestRandomRotation) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> random_rotation_op = vision::RandomRotation({-180, 180});
+  EXPECT_TRUE(random_rotation_op != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({random_rotation_op});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestProjectMap) {
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Repeat operation on ds
+  int32_t repeat_num = 2;
+  ds = ds->Repeat(repeat_num);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> random_vertical_flip_op = vision::RandomVerticalFlip(0.5);
+  EXPECT_TRUE(random_vertical_flip_op != nullptr);
+
+  // Create a Map operation on ds
+  ds = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "label"});
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Project operation on ds
+  std::vector<std::string> column_project = {"image"};
+  ds = ds->Project(column_project);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_TRUE(ds != nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_TRUE(iter != nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_TRUE(i == 20);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
\ No newline at end of file
diff --git a/tests/ut/cpp/dataset/cache_op_test.cc b/tests/ut/cpp/dataset/cache_op_test.cc
new file mode 100644
index 00000000000..bdb7c861b2c
--- /dev/null
+++ b/tests/ut/cpp/dataset/cache_op_test.cc
@@ -0,0 +1,579 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <string>
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/engine/cache/cache_client.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/datasetops/cache_op.h"
+#include "minddata/dataset/engine/datasetops/cache_lookup_op.h"
+#include "minddata/dataset/engine/datasetops/cache_merge_op.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "common/common.h"
+#include "gtest/gtest.h"
+#include "utils/log_adapter.h"
+#include "minddata/dataset/util/storage_container.h"  // lint !e322
+#include "minddata/dataset/engine/datasetops/source/random_data_op.h"
+#include "minddata/dataset/engine/data_schema.h"
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::dataset::CacheClient;
+using mindspore::dataset::TaskGroup;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+
+class MindDataTestCacheOp : public UT::DatasetOpTesting {
+ public:
+  void SetUp() override {
+    DatasetOpTesting::SetUp();
+    GlobalInit();
+  }
+};
+
+TEST_F(MindDataTestCacheOp, TestCacheServer) {
+  Status rc;
+  CacheClient myClient(1, 0, true);  // use arbitrary session of 1, size of 0, spilling is true
+  // cksum value of 1 for CreateCache here...normally you do not directly create a cache and the cksum arg is generated.
+  rc = myClient.CreateCache(1, true);
+  EXPECT_TRUE(rc.IsOk());
+  std::cout << myClient << std::endl;
+
+  // Create a schema using the C api's
+  int32_t rank = 0;  // not used
+  std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
+  // 2 columns. First column is an "image" 640,480,3
+  TensorShape c1Shape({640, 480, 3});
+  ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible,
+                   rank,  // not used
+                   &c1Shape);
+  // Column 2 will just be a scalar label number
+  TensorShape c2Shape({});  // empty shape is a 1-value scalar Tensor
+  ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape);
+
+  testSchema->AddColumn(c1);
+  testSchema->AddColumn(c2);
+
+  std::unordered_map<std::string, int32_t> map;
+  rc = testSchema->GetColumnNameMap(&map);
+  EXPECT_TRUE(rc.IsOk());
+
+  // Test the CacheSchema api
+  rc = myClient.CacheSchema(map);
+  EXPECT_TRUE(rc.IsOk());
+
+  // Create a tensor, take a snapshot and restore it back, and compare.
+  std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_UINT64));
+  t->SetItemAt<uint64_t>({0, 0}, 1);
+  t->SetItemAt<uint64_t>({0, 1}, 2);
+  t->SetItemAt<uint64_t>({0, 2}, 3);
+  t->SetItemAt<uint64_t>({1, 0}, 4);
+  t->SetItemAt<uint64_t>({1, 1}, 5);
+  t->SetItemAt<uint64_t>({1, 2}, 6);
+  std::cout << *t << std::endl;
+  TensorTable tbl;
+  TensorRow row;
+  row.push_back(t);
+  int64_t row_id;
+  rc = myClient.WriteRow(row, &row_id);
+  EXPECT_TRUE(rc.IsOk());
+
+  // Switch off build phase.
+  rc = myClient.BuildPhaseDone();
+  EXPECT_TRUE(rc.IsOk());
+
+  // Now restore from cache.
+  row.clear();
+  rc = myClient.GetRows({row_id}, &tbl);
+  row = tbl.front();
+  EXPECT_TRUE(rc.IsOk());
+  auto r = row.front();
+  std::cout << *r << std::endl;
+  // Compare
+  bool cmp = (*t == *r);
+  EXPECT_TRUE(cmp);
+
+  // Get back the schema and verify
+  std::unordered_map<std::string, int32_t> map_out;
+  rc = myClient.FetchSchema(&map_out);
+  EXPECT_TRUE(rc.IsOk());
+  cmp = (map_out == map);
+  EXPECT_TRUE(cmp);
+
+  // Test Purge and Destroy
+  rc = myClient.PurgeCache();
+  EXPECT_TRUE(rc.IsOk());
+  rc = myClient.DestroyCache();
+  EXPECT_TRUE(rc.IsOk());
+}
+
+TEST_F(MindDataTestCacheOp, TestConcurrencyRequest) {
+  // Clear the rc of the master thread if any
+  (void)TaskManager::GetMasterThreadRc();
+  TaskGroup vg;
+  Status rc;
+  CacheClient myClient(1, 1, true);  // use arbitrary session of 1, size 1, spilling is true
+  // cksum value of 1 for CreateCache here...normally you do not directly create a cache and the cksum arg is generated.
+  rc = myClient.CreateCache(1, true);
+  EXPECT_TRUE(rc.IsOk());
+  std::cout << myClient << std::endl;
+  std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_UINT64));
+  t->SetItemAt<uint64_t>({0, 0}, 1);
+  t->SetItemAt<uint64_t>({0, 1}, 2);
+  t->SetItemAt<uint64_t>({0, 2}, 3);
+  t->SetItemAt<uint64_t>({1, 0}, 4);
+  t->SetItemAt<uint64_t>({1, 1}, 5);
+  t->SetItemAt<uint64_t>({1, 2}, 6);
+  TensorTable tbl;
+  TensorRow row;
+  row.push_back(t);
+  // Cache tensor row t 5000 times using 10 threads.
+  for (auto k = 0; k < 10; ++k) {
+    Status vg_rc = vg.CreateAsyncTask("Test agent", [&myClient, &row]() -> Status {
+      TaskManager::FindMe()->Post();
+      for (auto i = 0; i < 500; i++) {
+        RETURN_IF_NOT_OK(myClient.WriteRow(row));
+      }
+      return Status::OK();
+    });
+    EXPECT_TRUE(vg_rc.IsOk());
+  }
+  ASSERT_TRUE(vg.join_all().IsOk());
+  ASSERT_TRUE(vg.GetTaskErrorIfAny().IsOk());
+  rc = myClient.BuildPhaseDone();
+  ASSERT_TRUE(rc.IsOk());
+  // Get statistics from the server.
+  CacheClient::ServiceStat stat{};
+  rc = myClient.GetStat(&stat);
+  ASSERT_TRUE(rc.IsOk());
+  std::cout << stat.min_row_id << ":" << stat.max_row_id << ":" << stat.num_mem_cached << ":" << stat.num_disk_cached
+            << "\n";
+  // Expect there are 5000 rows there.
+  EXPECT_EQ(5000, stat.max_row_id - stat.min_row_id + 1);
+  // Get them all back using row id and compare with tensor t.
+  for (auto i = stat.min_row_id; i <= stat.max_row_id; ++i) {
+    tbl.clear();
+    row.clear();
+    rc = myClient.GetRows({i}, &tbl);
+    EXPECT_TRUE(rc.IsOk());
+    row = tbl.front();
+    auto r = row.front();
+    bool cmp = (*t == *r);
+    EXPECT_TRUE(cmp);
+  }
+  rc = myClient.DestroyCache();
+  EXPECT_TRUE(rc.IsOk());
+}
+
+// Simple test with a repeated cache op over random data producer
+//
+//     RepeatOp
+//        |
+//     CacheOp
+//        |
+//   RandomDataOp
+//
+TEST_F(MindDataTestCacheOp, TestRandomDataCache1) {
+  Status rc;
+  int32_t rank = 0;  // not used
+  MS_LOG(INFO) << "UT test TestRandomDataCache1";
+  // Start with an empty execution tree
+  auto myTree = std::make_shared<ExecutionTree>();
+
+  // Create a schema using the C api's
+  std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
+
+  // 2 columns. First column is an "image" 640,480,3
+  TensorShape c1Shape({640, 480, 3});
+  ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible,
+                   rank,  // not used
+                   &c1Shape);
+
+  // Column 2 will just be a scalar label number
+  TensorShape c2Shape({});  // empty shape is a 1-value scalar Tensor
+  ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape);
+
+  testSchema->AddColumn(c1);
+  testSchema->AddColumn(c2);
+
+  // RandomDataOp
+  std::shared_ptr<RandomDataOp> myRandomDataOp;
+  rc = RandomDataOp::Builder()
+         .SetRowsPerBuffer(4)
+         .SetNumWorkers(4)
+         .SetDataSchema(std::move(testSchema))
+         .SetTotalRows(50)  // 50 samples for now
+         .Build(&myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // CacheOp
+  // size of 0, spilling is true
+  std::shared_ptr<CacheClient> myClient = std::make_shared<CacheClient>(1, 0, true);
+  std::shared_ptr<CacheOp> myCacheOp;
+
+  int64_t num_samples = 0;
+  int64_t start_index = 0;
+  auto seq_sampler = std::make_shared<SequentialSampler>(num_samples, start_index);
+  rc = CacheOp::Builder()
+         .SetNumWorkers(5)
+         .SetClient(myClient)
+         .SetRowsPerBuffer(4)
+         .SetSampler(std::move(seq_sampler))
+         .Build(&myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // RepeatOp
+  uint32_t numRepeats = 4;
+  std::shared_ptr<RepeatOp> myRepeatOp;
+  rc = RepeatOp::Builder(numRepeats).Build(&myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // Assign tree relations and root
+  rc = myRepeatOp->AddChild(myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myCacheOp->AddChild(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssignRoot(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  MS_LOG(INFO) << "Launching tree and begin iteration";
+  rc = myTree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+
+  // quick check to see what tree looks like
+  std::ostringstream ss;
+  ss << *myTree;  // some funny const error if I try to write directly to ms log stream
+  MS_LOG(INFO) << "Here's the tree:\n" << ss.str();
+
+  std::cout << *myClient << std::endl;
+
+  rc = myTree->Launch();
+  EXPECT_TRUE(rc.IsOk());
+
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator dI(myTree);
+  TensorRow tensorList;
+  rc = dI.FetchNextTensorRow(&tensorList);
+  EXPECT_TRUE(rc.IsOk());
+  int rowCount = 0;
+  while (!tensorList.empty()) {
+    // Don't display these rows, just count them
+    MS_LOG(INFO) << "Row fetched #: " << rowCount;
+    rc = dI.FetchNextTensorRow(&tensorList);
+    EXPECT_TRUE(rc.IsOk());
+    rowCount++;
+  }
+  ASSERT_EQ(rowCount, 200);
+  rc = myClient->DestroyCache();
+  EXPECT_TRUE(rc.IsOk());
+}
+
+//// Simple test with a repeated cache op over random data producer.
+//// This one will exceed memory and require a spill.
+////
+////     RepeatOp
+////        |
+////     CacheOp
+////        |
+////   RandomDataOp
+////
+TEST_F(MindDataTestCacheOp, TestRandomDataCacheSpill) {
+  Status rc;
+  int32_t rank = 0;  // not used
+  MS_LOG(INFO) << "UT test TestRandomDataCacheSpill";
+  // Start with an empty execution tree
+  auto myTree = std::make_shared<ExecutionTree>();
+
+  // Create a schema using the C api's
+  std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
+
+  // 2 columns. First column is an "image" 640,480,3
+  TensorShape c1Shape({640, 480, 3});
+  ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible,
+                   rank,  // not used
+                   &c1Shape);
+
+  // Column 2 will just be a scalar label number
+  TensorShape c2Shape({});  // empty shape is a 1-value scalar Tensor
+  ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape);
+
+  testSchema->AddColumn(c1);
+  testSchema->AddColumn(c2);
+
+  // RandomDataOp
+  std::shared_ptr<RandomDataOp> myRandomDataOp;
+  rc = RandomDataOp::Builder()
+         .SetRowsPerBuffer(2)
+         .SetNumWorkers(4)
+         .SetDataSchema(std::move(testSchema))
+         .SetTotalRows(10)
+         .Build(&myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // CacheOp
+  int64_t num_samples = 0;
+  int64_t start_index = 0;
+  auto seq_sampler = std::make_shared<SequentialSampler>(num_samples, start_index);
+  std::shared_ptr<CacheClient> myClient = std::make_shared<CacheClient>(1, 4, true);
+  std::shared_ptr<CacheOp> myCacheOp;
+  rc = CacheOp::Builder()
+         .SetNumWorkers(4)
+         .SetClient(myClient)
+         .SetRowsPerBuffer(3)
+         .SetSampler(std::move(seq_sampler))
+         .Build(&myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // RepeatOp
+  uint32_t numRepeats = 4;
+  std::shared_ptr<RepeatOp> myRepeatOp;
+  rc = RepeatOp::Builder(numRepeats).Build(&myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // Assign tree relations and root
+  rc = myRepeatOp->AddChild(myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myCacheOp->AddChild(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssignRoot(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  MS_LOG(INFO) << "Launching tree and begin iteration";
+  rc = myTree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+
+  std::cout << *myClient << std::endl;
+
+  rc = myTree->Launch();
+  EXPECT_TRUE(rc.IsOk());
+
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator dI(myTree);
+  TensorRow tensorList;
+  rc = dI.FetchNextTensorRow(&tensorList);
+  EXPECT_TRUE(rc.IsOk());
+  int rowCount = 0;
+  while (!tensorList.empty()) {
+    // Don't display these rows, just count them
+    MS_LOG(INFO) << "Row fetched #: " << rowCount;
+    rc = dI.FetchNextTensorRow(&tensorList);
+    EXPECT_TRUE(rc.IsOk());
+    rowCount++;
+  }
+  ASSERT_EQ(rowCount, 40);
+  rc = myClient->DestroyCache();
+  EXPECT_TRUE(rc.IsOk());
+}
+
+TEST_F(MindDataTestCacheOp, TestImageFolderCacheMerge) {
+  Status rc;
+  int64_t num_samples = 0;
+  int64_t start_index = 0;
+  auto seq_sampler = std::make_shared<SequentialSampler>(num_samples, start_index);
+
+  std::shared_ptr<CacheClient> myClient = std::make_shared<CacheClient>(1, 0, true);
+
+  std::shared_ptr<CacheMergeOp> myMergeOp;
+  rc = CacheMergeOp::Builder().SetNumWorkers(3).SetOpConnectorSize(3).SetNumCleaner(2).SetClient(myClient).Build(
+    &myMergeOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  std::shared_ptr<CacheLookupOp> myLookupOp;
+  rc = CacheLookupOp::Builder()
+         .SetNumWorkers(3)
+         .SetOpConnectorSize(3)
+         .SetClient(myClient)
+         .SetSampler(seq_sampler)
+         .Build(&myLookupOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  std::shared_ptr<ImageFolderOp> so;
+  ImageFolderOp::Builder builder;
+  builder.SetSampler(myLookupOp)
+    .SetOpConnectorSize(3)
+    .SetNumWorkers(3)
+    .SetRowsPerBuffer(2)
+    .SetExtensions({".jpg", ".JPEG"})
+    .SetRecursive(true)
+    .SetImageFolderDir(datasets_root_path_ + "/testPK/data");
+  rc = builder.Build(&so);
+  EXPECT_TRUE(rc.IsOk());
+
+  // RepeatOp
+  uint32_t numRepeats = 4;
+  std::shared_ptr<RepeatOp> myRepeatOp;
+  rc = RepeatOp::Builder(numRepeats).Build(&myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  auto myTree = std::make_shared<ExecutionTree>();
+  rc = myTree->AssociateNode(so);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myLookupOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myMergeOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssignRoot(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myRepeatOp->AddChild(myMergeOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myMergeOp->AddChild(myLookupOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myMergeOp->AddChild(so);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myTree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->Launch();
+  EXPECT_TRUE(rc.IsOk());
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator dI(myTree);
+  TensorRow tensorList;
+  rc = dI.FetchNextTensorRow(&tensorList);
+  EXPECT_TRUE(rc.IsOk());
+  int rowCount = 0;
+  while (!tensorList.empty()) {
+    rc = dI.FetchNextTensorRow(&tensorList);
+    EXPECT_TRUE(rc.IsOk());
+    if (rc.IsError()) {
+      std::cout << rc << std::endl;
+      break;
+    }
+    rowCount++;
+  }
+  ASSERT_EQ(rowCount, 176);
+  std::cout << "Row count : " << rowCount << std::endl;
+  rc = myClient->DestroyCache();
+  EXPECT_TRUE(rc.IsOk());
+}
+
+//// Simple test with a repeated cache op over random data producer.
+//// The difference in this one is that you do not add the sampler to the cache op directly.
+//// Instead, the sampler is added as part of the leaf op construction.  Then, the prepare
+//// phase will pull this up from the leaf and into the cache.
+//// It removes the sampler from the leaf op, which doesn't make sense there anyway for
+//// the RandomDataOp which doesn't support sampling without a cache.
+////
+////     RepeatOp
+////        |
+////     CacheOp
+////        |
+////   RandomDataOp
+////
+TEST_F(MindDataTestCacheOp, TestCacheInheritSampler) {
+  Status rc;
+  int32_t rank = 0;  // not used
+  MS_LOG(INFO) << "UT test TestCacheInheritSampler";
+
+  int64_t num_samples = 0;
+  int64_t start_index = 0;
+  auto seq_sampler = std::make_shared<SequentialSampler>(num_samples, start_index);
+
+  // Start with an empty execution tree
+  auto myTree = std::make_shared<ExecutionTree>();
+
+  // Create a schema using the C api's
+  std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
+
+  // 2 columns. First column is an "image" 640,480,3
+  TensorShape c1Shape({640, 480, 3});
+  ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible,
+                   rank,  // not used
+                   &c1Shape);
+
+  // Column 2 will just be a scalar label number
+  TensorShape c2Shape({});  // empty shape is a 1-value scalar Tensor
+  ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape);
+
+  testSchema->AddColumn(c1);
+  testSchema->AddColumn(c2);
+
+  // RandomDataOp
+  std::shared_ptr<RandomDataOp> myRandomDataOp;
+  rc = RandomDataOp::Builder()
+         .SetRowsPerBuffer(2)
+         .SetNumWorkers(4)
+         .SetDataSchema(std::move(testSchema))
+         .SetTotalRows(10)
+         .SetSampler(std::move(seq_sampler))
+         .Build(&myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // CacheOp
+  std::shared_ptr<CacheClient> myClient = std::make_shared<CacheClient>(1, 4, true);
+  std::shared_ptr<CacheOp> myCacheOp;
+  rc = CacheOp::Builder().SetNumWorkers(4).SetClient(myClient).SetRowsPerBuffer(3).Build(&myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // RepeatOp
+  uint32_t numRepeats = 4;
+  std::shared_ptr<RepeatOp> myRepeatOp;
+  rc = RepeatOp::Builder(numRepeats).Build(&myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  // Assign tree relations and root
+  rc = myRepeatOp->AddChild(myCacheOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myCacheOp->AddChild(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssignRoot(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  MS_LOG(INFO) << "Launching tree and begin iteration";
+  rc = myTree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+
+  std::cout << *myClient << std::endl;
+
+  rc = myTree->Launch();
+  EXPECT_TRUE(rc.IsOk());
+
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator dI(myTree);
+  TensorRow tensorList;
+  rc = dI.FetchNextTensorRow(&tensorList);
+  EXPECT_TRUE(rc.IsOk());
+  int rowCount = 0;
+  while (!tensorList.empty()) {
+    // Don't display these rows, just count them
+    MS_LOG(INFO) << "Row fetched #: " << rowCount;
+    rc = dI.FetchNextTensorRow(&tensorList);
+    EXPECT_TRUE(rc.IsOk());
+    rowCount++;
+  }
+  ASSERT_EQ(rowCount, 40);
+  rc = myClient->DestroyCache();
+  EXPECT_TRUE(rc.IsOk());
+}
diff --git a/tests/ut/cpp/dataset/celeba_op_test.cc b/tests/ut/cpp/dataset/celeba_op_test.cc
index a109739fdae..ccaed122f4d 100644
--- a/tests/ut/cpp/dataset/celeba_op_test.cc
+++ b/tests/ut/cpp/dataset/celeba_op_test.cc
@@ -19,11 +19,11 @@
 #include <string>
 
 #include "common/common.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/celeba_op.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/celeba_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/center_crop_op_test.cc b/tests/ut/cpp/dataset/center_crop_op_test.cc
index 54c45c957e0..cd0f362f643 100644
--- a/tests/ut/cpp/dataset/center_crop_op_test.cc
+++ b/tests/ut/cpp/dataset/center_crop_op_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/center_crop_op.h"
-#include "dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/center_crop_op.h"
+#include "minddata/dataset/core/cv_tensor.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/channel_swap_test.cc b/tests/ut/cpp/dataset/channel_swap_test.cc
index f1dc1396caf..2000de15b2d 100644
--- a/tests/ut/cpp/dataset/channel_swap_test.cc
+++ b/tests/ut/cpp/dataset/channel_swap_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/hwc_to_chw_op.h"
-#include "dataset/core/data_type.h"
+#include "minddata/dataset/kernels/image/hwc_to_chw_op.h"
+#include "minddata/dataset/core/data_type.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/cifar_op_test.cc b/tests/ut/cpp/dataset/cifar_op_test.cc
index b37b9acaee8..ed22f4f3470 100644
--- a/tests/ut/cpp/dataset/cifar_op_test.cc
+++ b/tests/ut/cpp/dataset/cifar_op_test.cc
@@ -20,14 +20,14 @@
 
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/cifar_op.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/circular_pool_test.cc b/tests/ut/cpp/dataset/circular_pool_test.cc
index c42b08ddcd5..d06f8466841 100644
--- a/tests/ut/cpp/dataset/circular_pool_test.cc
+++ b/tests/ut/cpp/dataset/circular_pool_test.cc
@@ -15,9 +15,9 @@
  */
 #include <string>
 #include <random>
-#include "dataset/util/task_manager.h"
-#include "dataset/util/circular_pool.h"
-#include "dataset/util/services.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/util/services.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/client_config_test.cc b/tests/ut/cpp/dataset/client_config_test.cc
index a907d501344..5cc9600b4e0 100644
--- a/tests/ut/cpp/dataset/client_config_test.cc
+++ b/tests/ut/cpp/dataset/client_config_test.cc
@@ -20,11 +20,11 @@
 #include <iostream>
 #include <memory>
 #include <string>
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "gtest/gtest.h"
-#include "dataset/core/global_context.h"
-#include "dataset/util/status.h"
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/clue_op_test.cc b/tests/ut/cpp/dataset/clue_op_test.cc
index ff2f01a9ffb..0935434a06c 100644
--- a/tests/ut/cpp/dataset/clue_op_test.cc
+++ b/tests/ut/cpp/dataset/clue_op_test.cc
@@ -17,13 +17,13 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "dataset/engine/datasetops/source/clue_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/engine/datasetops/source/clue_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace common = mindspore::common;
 
diff --git a/tests/ut/cpp/dataset/coco_op_test.cc b/tests/ut/cpp/dataset/coco_op_test.cc
index bcb82f8ec11..6e6d3c26e51 100644
--- a/tests/ut/cpp/dataset/coco_op_test.cc
+++ b/tests/ut/cpp/dataset/coco_op_test.cc
@@ -20,18 +20,18 @@
 
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/coco_op.h"
-#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/coco_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/common/bboxop_common.cc b/tests/ut/cpp/dataset/common/bboxop_common.cc
index 70e6b5a339e..62c9f853488 100644
--- a/tests/ut/cpp/dataset/common/bboxop_common.cc
+++ b/tests/ut/cpp/dataset/common/bboxop_common.cc
@@ -26,9 +26,9 @@
 #include "./tinyxml2.h"
 #include "opencv2/opencv.hpp"
 #include "common/utils.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/util/path.h"
-#include "dataset/core/constants.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/core/constants.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
@@ -66,17 +66,16 @@ void BBoxOpCommon::GetInputImagesAndAnnotations(const std::string &dir, std::siz
     MS_LOG(ERROR) << "Images folder was not found : " + images_path;
     EXPECT_TRUE(dir_path.Exists());
   }
-  std::size_t files_fetched = 0;
   // get image file paths
-  while (image_dir_itr->hasNext() && files_fetched < num_of_samples) {
+  while (image_dir_itr->hasNext()) {
     Path image_path = image_dir_itr->next();
     if (image_path.Extension() == std::string(kImageExt)) {
       paths_to_fetch.push_back(image_path.toString());
-      files_fetched++;
     }
   }
   // sort fetched files
   std::sort(paths_to_fetch.begin(), paths_to_fetch.end());
+  std::size_t files_fetched = 0;
   for (const auto &image_file : paths_to_fetch) {
     std::string image_ext = std::string(kImageExt);
     std::string annot_file = image_file;
@@ -100,6 +99,10 @@ void BBoxOpCommon::GetInputImagesAndAnnotations(const std::string &dir, std::siz
     // add image and annotation to the tensor table
     TensorRow row_data({std::move(input_tensor_), std::move(annotation_tensor)});
     images_and_annotations_.push_back(row_data);
+    files_fetched++;
+    if (files_fetched == num_of_samples) {
+      break;
+    }
   }
 }
 
@@ -118,14 +121,11 @@ void BBoxOpCommon::SaveImagesWithAnnotations(BBoxOpCommon::FileType type, const
     bool passing_data_fetch = true;
     // For each bounding box draw on the image.
     for (uint32_t i = 0; i < num_of_boxes; i++) {
-      uint32_t x = 0;
-      uint32_t y = 0;
-      uint32_t w = 0;
-      uint32_t h = 0;
-      passing_data_fetch &= row[1]->GetUnsignedIntAt(&x, {i, 0}).IsOk();
-      passing_data_fetch &= row[1]->GetUnsignedIntAt(&y, {i, 1}).IsOk();
-      passing_data_fetch &= row[1]->GetUnsignedIntAt(&w, {i, 2}).IsOk();
-      passing_data_fetch &= row[1]->GetUnsignedIntAt(&h, {i, 3}).IsOk();
+      float x = 0.0, y = 0.0, w = 0.0, h = 0.0;
+      passing_data_fetch &= row[1]->GetItemAt<float>(&x, {i, 0}).IsOk();
+      passing_data_fetch &= row[1]->GetItemAt<float>(&y, {i, 1}).IsOk();
+      passing_data_fetch &= row[1]->GetItemAt<float>(&w, {i, 2}).IsOk();
+      passing_data_fetch &= row[1]->GetItemAt<float>(&h, {i, 3}).IsOk();
       if (!passing_data_fetch) {
         MS_LOG(ERROR) << "Fetching bbox coordinates failed in SaveImagesWithAnnotations.";
         EXPECT_TRUE(passing_data_fetch);
@@ -193,24 +193,24 @@ bool BBoxOpCommon::LoadAnnotationFile(const std::string &path, std::shared_ptr<T
     MS_LOG(ERROR) << "No object find in " + path;
     return false;
   }
-  std::vector<uint32_t> return_value_list;
+  std::vector<float> return_value_list;
   dsize_t bbox_count = 0;      // keep track of number of bboxes in file
   dsize_t bbox_val_count = 4;  // creating bboxes of size 4 to test function
   // FILE OK TO READ
   while (object != nullptr) {
     bbox_count += 1;
     std::string label_name;
-    uint32_t xmin = 0, ymin = 0, xmax = 0, ymax = 0;
+    float xmin = 0.0, ymin = 0.0, xmax = 0.0, ymax = 0.0;
     XMLElement *bbox_node = object->FirstChildElement("bndbox");
     if (bbox_node != nullptr) {
       XMLElement *xmin_node = bbox_node->FirstChildElement("xmin");
-      if (xmin_node != nullptr) xmin = xmin_node->UnsignedText();
+      if (xmin_node != nullptr) xmin = xmin_node->FloatText();
       XMLElement *ymin_node = bbox_node->FirstChildElement("ymin");
-      if (ymin_node != nullptr) ymin = ymin_node->UnsignedText();
+      if (ymin_node != nullptr) ymin = ymin_node->FloatText();
       XMLElement *xmax_node = bbox_node->FirstChildElement("xmax");
-      if (xmax_node != nullptr) xmax = xmax_node->UnsignedText();
+      if (xmax_node != nullptr) xmax = xmax_node->FloatText();
       XMLElement *ymax_node = bbox_node->FirstChildElement("ymax");
-      if (ymax_node != nullptr) ymax = ymax_node->UnsignedText();
+      if (ymax_node != nullptr) ymax = ymax_node->FloatText();
     } else {
       MS_LOG(ERROR) << "bndbox dismatch in " + path;
       return false;
diff --git a/tests/ut/cpp/dataset/common/bboxop_common.h b/tests/ut/cpp/dataset/common/bboxop_common.h
index ba3ceb62d94..243908e7a34 100644
--- a/tests/ut/cpp/dataset/common/bboxop_common.h
+++ b/tests/ut/cpp/dataset/common/bboxop_common.h
@@ -17,7 +17,7 @@
 #define TESTS_DATASET_UT_CORE_COMMON_DE_UT_BBOXOP_COMMON_H_
 
 #include "cvop_common.h"
-#include "dataset/util/path.h"
+#include "minddata/dataset/util/path.h"
 
 namespace UT {
 namespace CVOP {
diff --git a/tests/ut/cpp/dataset/common/cvop_common.cc b/tests/ut/cpp/dataset/common/cvop_common.cc
index 6f66229e80d..48d69564fd3 100644
--- a/tests/ut/cpp/dataset/common/cvop_common.cc
+++ b/tests/ut/cpp/dataset/common/cvop_common.cc
@@ -18,9 +18,9 @@
 #include <string>
 #include <vector>
 #include "cvop_common.h"
-#include "dataset/core/constants.h"
+#include "minddata/dataset/core/constants.h"
 #include "common/utils.h"
-#include "dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/cv_tensor.h"
 #include "utils/log_adapter.h"
 #include <fstream>
 #include <opencv2/opencv.hpp>
diff --git a/tests/ut/cpp/dataset/common/cvop_common.h b/tests/ut/cpp/dataset/common/cvop_common.h
index 02c079fd68e..59134091fdf 100644
--- a/tests/ut/cpp/dataset/common/cvop_common.h
+++ b/tests/ut/cpp/dataset/common/cvop_common.h
@@ -19,7 +19,7 @@
 #include <memory>
 #include <string>
 #include "common.h"
-#include "dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
 
 namespace UT {
 namespace CVOP {
diff --git a/tests/ut/cpp/dataset/concat_op_test.cc b/tests/ut/cpp/dataset/concat_op_test.cc
index 70d0268ec75..9e991ce0d38 100644
--- a/tests/ut/cpp/dataset/concat_op_test.cc
+++ b/tests/ut/cpp/dataset/concat_op_test.cc
@@ -19,7 +19,7 @@
 
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/dataset/concatenate_op_test.cc b/tests/ut/cpp/dataset/concatenate_op_test.cc
index 1ceedbac38f..dc2fc69266c 100644
--- a/tests/ut/cpp/dataset/concatenate_op_test.cc
+++ b/tests/ut/cpp/dataset/concatenate_op_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common.h"
-#include "dataset/kernels/data/concatenate_op.h"
+#include "minddata/dataset/kernels/data/concatenate_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/connector_test.cc b/tests/ut/cpp/dataset/connector_test.cc
index 7ee36cc2c0d..0fc5b100d73 100644
--- a/tests/ut/cpp/dataset/connector_test.cc
+++ b/tests/ut/cpp/dataset/connector_test.cc
@@ -23,8 +23,8 @@
 
 
 #include "common/common.h"
-#include "dataset/engine/connector.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/engine/connector.h"
+#include "minddata/dataset/util/task_manager.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/cut_out_op_test.cc b/tests/ut/cpp/dataset/cut_out_op_test.cc
index 462fb3a875b..5d24d9c3f96 100644
--- a/tests/ut/cpp/dataset/cut_out_op_test.cc
+++ b/tests/ut/cpp/dataset/cut_out_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/cut_out_op.h"
+#include "minddata/dataset/kernels/image/cut_out_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/cyclic_array_test.cc b/tests/ut/cpp/dataset/cyclic_array_test.cc
index 55f75c403f4..380436de1b1 100644
--- a/tests/ut/cpp/dataset/cyclic_array_test.cc
+++ b/tests/ut/cpp/dataset/cyclic_array_test.cc
@@ -19,7 +19,7 @@
 #include "common/cvop_common.h"
 #include "gtest/gtest.h"
 #include "securec.h"
-#include "dataset/engine/perf/cyclic_array.h"
+#include "minddata/dataset/engine/perf/cyclic_array.h"
 #include <chrono>
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/datatype_test.cc b/tests/ut/cpp/dataset/datatype_test.cc
index a55853c4c59..b81618dc247 100644
--- a/tests/ut/cpp/dataset/datatype_test.cc
+++ b/tests/ut/cpp/dataset/datatype_test.cc
@@ -15,16 +15,14 @@
  */
 #include <string>
 #include "./securec.h"
-#include "dataset/core/data_type.h"
+#include "minddata/dataset/core/data_type.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include <opencv2/opencv.hpp>
-#include "dataset/core/constants.h"
+#include "minddata/dataset/core/constants.h"
 
 using namespace mindspore::dataset;
 
-namespace py = pybind11;
-
 class MindDataTestDatatype : public UT::Common {
  public:
     MindDataTestDatatype() = default;
diff --git a/tests/ut/cpp/dataset/decode_op_test.cc b/tests/ut/cpp/dataset/decode_op_test.cc
index 7f3e129ac0d..1cd03099ce3 100644
--- a/tests/ut/cpp/dataset/decode_op_test.cc
+++ b/tests/ut/cpp/dataset/decode_op_test.cc
@@ -16,7 +16,7 @@
 #include <fstream>
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/duplicate_op_test.cc b/tests/ut/cpp/dataset/duplicate_op_test.cc
index b7ce32f6552..93779b084d6 100644
--- a/tests/ut/cpp/dataset/duplicate_op_test.cc
+++ b/tests/ut/cpp/dataset/duplicate_op_test.cc
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
-#include "dataset/core/tensor.h"
-#include "dataset/kernels/data/duplicate_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/data/duplicate_op.h"
 
 using namespace mindspore::dataset;
 
diff --git a/tests/ut/cpp/dataset/execution_tree_test.cc b/tests/ut/cpp/dataset/execution_tree_test.cc
index 529644331a5..b871dd00d80 100644
--- a/tests/ut/cpp/dataset/execution_tree_test.cc
+++ b/tests/ut/cpp/dataset/execution_tree_test.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 #include <string>
-#include "dataset/util/circular_pool.h"
-#include "dataset/core/client.h"
-#include "dataset/engine/execution_tree.h"
-#include "dataset/engine/datasetops/shuffle_op.h"
-#include "dataset/engine/datasetops/source/tf_reader_op.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/engine/execution_tree.h"
+#include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/fill_op_test.cc b/tests/ut/cpp/dataset/fill_op_test.cc
index d43b7d75489..20e323cc8d8 100644
--- a/tests/ut/cpp/dataset/fill_op_test.cc
+++ b/tests/ut/cpp/dataset/fill_op_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common.h"
-#include "dataset/kernels/data/fill_op.h"
+#include "minddata/dataset/kernels/data/fill_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/filter_op_test.cc b/tests/ut/cpp/dataset/filter_op_test.cc
index 45ee714337e..3e5be8dc047 100644
--- a/tests/ut/cpp/dataset/filter_op_test.cc
+++ b/tests/ut/cpp/dataset/filter_op_test.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/circular_pool.h"
-#include "dataset/core/client.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/global_context_test.cc b/tests/ut/cpp/dataset/global_context_test.cc
index bb75d941aa1..cd4c970ae67 100644
--- a/tests/ut/cpp/dataset/global_context_test.cc
+++ b/tests/ut/cpp/dataset/global_context_test.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/global_context.h"
 #include "common/common.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/dataset/gnn_graph_test.cc b/tests/ut/cpp/dataset/gnn_graph_test.cc
index dc74e66b0c0..c4dd7b055c5 100644
--- a/tests/ut/cpp/dataset/gnn_graph_test.cc
+++ b/tests/ut/cpp/dataset/gnn_graph_test.cc
@@ -20,9 +20,9 @@
 
 #include "common/common.h"
 #include "gtest/gtest.h"
-#include "dataset/util/status.h"
-#include "dataset/engine/gnn/node.h"
-#include "dataset/engine/gnn/graph_loader.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/engine/gnn/node.h"
+#include "minddata/dataset/engine/gnn/graph_loader.h"
 
 using namespace mindspore::dataset;
 using namespace mindspore::dataset::gnn;
@@ -49,9 +49,10 @@ TEST_F(MindDataTestGNNGraph, TestGraphLoader) {
   EdgeTypeMap e_type_map;
   NodeFeatureMap n_feature_map;
   EdgeFeatureMap e_feature_map;
-  DefaultFeatureMap default_feature_map;
+  DefaultNodeFeatureMap default_node_feature_map;
+  DefaultEdgeFeatureMap default_edge_feature_map;
   EXPECT_TRUE(gl.GetNodesAndEdges(&n_id_map, &e_id_map, &n_type_map, &e_type_map, &n_feature_map, &e_feature_map,
-                                  &default_feature_map)
+                                  &default_node_feature_map, &default_edge_feature_map)
                 .IsOk());
   EXPECT_EQ(n_id_map.size(), 20);
   EXPECT_EQ(e_id_map.size(), 40);
@@ -119,6 +120,17 @@ TEST_F(MindDataTestGNNGraph, TestGetSampledNeighbors) {
   std::transform(edges->begin<EdgeIdType>(), edges->end<EdgeIdType>(), edge_list.begin(),
                  [](const EdgeIdType edge) { return edge; });
 
+  TensorRow edge_features;
+  s = graph.GetEdgeFeature(edges, meta_info.edge_feature_type, &edge_features);
+  EXPECT_TRUE(s.IsOk());
+  EXPECT_TRUE(edge_features[0]->ToString() ==
+              "Tensor (shape: <40>, Type: int32)\n"
+              "[0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0]");
+  EXPECT_TRUE(edge_features[1]->ToString() ==
+              "Tensor (shape: <40>, Type: float32)\n"
+              "[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2,2.1,2.2,2.3,2.4,2.5,2.6,2."
+              "7,2.8,2.9,3,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8,3.9,4]");
+
   std::shared_ptr<Tensor> nodes;
   s = graph.GetNodesFromEdges(edge_list, &nodes);
   EXPECT_TRUE(s.IsOk());
@@ -247,4 +259,30 @@ TEST_F(MindDataTestGNNGraph, TestRandomWalk) {
   s = graph.RandomWalk(node_list, meta_path, 2.0, 0.5, -1, &walk_path);
   EXPECT_TRUE(s.IsOk());
   EXPECT_TRUE(walk_path->shape().ToString() == "<33,60>");
-}
\ No newline at end of file
+}
+
+TEST_F(MindDataTestGNNGraph, TestRandomWalkDefaults) {
+  std::string path = "data/mindrecord/testGraphData/sns";
+  Graph graph(path, 1);
+  Status s = graph.Init();
+  EXPECT_TRUE(s.IsOk());
+
+  MetaInfo meta_info;
+  s = graph.GetMetaInfo(&meta_info);
+  EXPECT_TRUE(s.IsOk());
+
+  std::shared_ptr<Tensor> nodes;
+  s = graph.GetAllNodes(meta_info.node_type[0], &nodes);
+  EXPECT_TRUE(s.IsOk());
+  std::vector<NodeIdType> node_list;
+  for (auto itr = nodes->begin<NodeIdType>(); itr != nodes->end<NodeIdType>(); ++itr) {
+    node_list.push_back(*itr);
+  }
+
+  print_int_vec(node_list, "node list ");
+  std::vector<NodeType> meta_path(59, 1);
+  std::shared_ptr<Tensor> walk_path;
+  s = graph.RandomWalk(node_list, meta_path, 1.0, 1.0, -1, &walk_path);
+  EXPECT_TRUE(s.IsOk());
+  EXPECT_TRUE(walk_path->shape().ToString() == "<33,60>");
+}
diff --git a/tests/ut/cpp/dataset/image_folder_op_test.cc b/tests/ut/cpp/dataset/image_folder_op_test.cc
index 576c5abbfcb..3168efa1965 100644
--- a/tests/ut/cpp/dataset/image_folder_op_test.cc
+++ b/tests/ut/cpp/dataset/image_folder_op_test.cc
@@ -19,18 +19,18 @@
 #include <string>
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/image_folder_op.h"
-#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/interrupt_test.cc b/tests/ut/cpp/dataset/interrupt_test.cc
index 7ab608b9ae7..8a064131751 100644
--- a/tests/ut/cpp/dataset/interrupt_test.cc
+++ b/tests/ut/cpp/dataset/interrupt_test.cc
@@ -15,10 +15,10 @@
  */
 #include "common/common.h"
 #include "utils/log_adapter.h"
-#include "dataset/util/services.h"
-#include "dataset/util/intrp_service.h"
-#include "dataset/util/task_manager.h"
-#include "dataset/util/queue.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/intrp_service.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/util/queue.h"
 
 using namespace mindspore::dataset;
 using mindspore::MsLogLevel::INFO;
diff --git a/tests/ut/cpp/dataset/jieba_tokenizer_op_test.cc b/tests/ut/cpp/dataset/jieba_tokenizer_op_test.cc
index c5a733f2856..85b3384d364 100644
--- a/tests/ut/cpp/dataset/jieba_tokenizer_op_test.cc
+++ b/tests/ut/cpp/dataset/jieba_tokenizer_op_test.cc
@@ -18,7 +18,7 @@
 #include <string_view>
 
 #include "common/common.h"
-#include "dataset/text/kernels/jieba_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/jieba_tokenizer_op.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 
@@ -39,21 +39,22 @@ TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opFuntions) {
   std::string dataset_path = datasets_root_path_ + "/jiebadict";
   std::string hmm_path = dataset_path + "/hmm_model.utf8";
   std::string mp_path = dataset_path + "/jieba.dict.utf8";
-  std::shared_ptr<Tensor> output_tensor;
+  TensorRow input, output;
   std::unique_ptr<JiebaTokenizerOp> op(new JiebaTokenizerOp(hmm_path, mp_path));
 
   std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>("今天天气太好了我们一起去外面玩吧");
-  Status s = op->Compute(input_tensor, &output_tensor);
+  input.push_back(input_tensor);
+  Status s = op->Compute(input, &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output_tensor->Rank(), 1);
-  EXPECT_EQ(output_tensor->Size(), 7);
-  CheckEqual(output_tensor, {0}, "今天天气");
-  CheckEqual(output_tensor, {1}, "太好了");
-  CheckEqual(output_tensor, {2}, "我们");
-  CheckEqual(output_tensor, {3}, "一起");
-  CheckEqual(output_tensor, {4}, "去");
-  CheckEqual(output_tensor, {5}, "外面");
-  CheckEqual(output_tensor, {6}, "玩吧");
+  EXPECT_EQ(output[0]->Rank(), 1);
+  EXPECT_EQ(output[0]->Size(), 7);
+  CheckEqual(output[0], {0}, "今天天气");
+  CheckEqual(output[0], {1}, "太好了");
+  CheckEqual(output[0], {2}, "我们");
+  CheckEqual(output[0], {3}, "一起");
+  CheckEqual(output[0], {4}, "去");
+  CheckEqual(output[0], {5}, "外面");
+  CheckEqual(output[0], {6}, "玩吧");
 }
 
 TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opAdd) {
@@ -61,16 +62,17 @@ TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opAdd) {
   std::string dataset_path = datasets_root_path_ + "/jiebadict";
   std::string hmm_path = dataset_path + "/hmm_model.utf8";
   std::string mp_path = dataset_path + "/jieba.dict.utf8";
-  std::shared_ptr<Tensor> output_tensor;
+  TensorRow input, output;
   std::unique_ptr<JiebaTokenizerOp> op(new JiebaTokenizerOp(hmm_path, mp_path));
 
   op->AddWord("男默女泪");
   std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>("男默女泪");
-  Status s = op->Compute(input_tensor, &output_tensor);
+  input.push_back(input_tensor);
+  Status s = op->Compute(input, &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output_tensor->Rank(), 1);
-  EXPECT_EQ(output_tensor->Size(), 1);
-  CheckEqual(output_tensor, {0}, "男默女泪");
+  EXPECT_EQ(output[0]->Rank(), 1);
+  EXPECT_EQ(output[0]->Size(), 1);
+  CheckEqual(output[0], {0}, "男默女泪");
 }
 
 TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opEmpty) {
@@ -78,14 +80,15 @@ TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opEmpty) {
   std::string dataset_path = datasets_root_path_ + "/jiebadict";
   std::string hmm_path = dataset_path + "/hmm_model.utf8";
   std::string mp_path = dataset_path + "/jieba.dict.utf8";
-  std::shared_ptr<Tensor> output_tensor;
+  TensorRow input, output;
   std::unique_ptr<JiebaTokenizerOp> op(new JiebaTokenizerOp(hmm_path, mp_path));
 
   op->AddWord("男默女泪");
   std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>("");
-  Status s = op->Compute(input_tensor, &output_tensor);
+  input.push_back(input_tensor);
+  Status s = op->Compute(input, &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output_tensor->Rank(), 1);
-  EXPECT_EQ(output_tensor->Size(), 1);
-  CheckEqual(output_tensor, {0}, "");
+  EXPECT_EQ(output[0]->Rank(), 1);
+  EXPECT_EQ(output[0]->Size(), 1);
+  CheckEqual(output[0], {0}, "");
 }
\ No newline at end of file
diff --git a/tests/ut/cpp/dataset/manifest_op_test.cc b/tests/ut/cpp/dataset/manifest_op_test.cc
index 6317a6a3459..a6eef4aaa24 100644
--- a/tests/ut/cpp/dataset/manifest_op_test.cc
+++ b/tests/ut/cpp/dataset/manifest_op_test.cc
@@ -20,12 +20,12 @@
 
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/manifest_op.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/manifest_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/map_op_test.cc b/tests/ut/cpp/dataset/map_op_test.cc
index 8b6a1524889..4e9cfe9ec95 100644
--- a/tests/ut/cpp/dataset/map_op_test.cc
+++ b/tests/ut/cpp/dataset/map_op_test.cc
@@ -17,13 +17,14 @@
 #include <memory>
 #include <vector>
 
+
 #include "common/common.h"
-#include "dataset/core/client.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/datasetops/source/image_folder_op.h"
-#include "dataset/kernels/image/decode_op.h"
-#include "dataset/kernels/image/resize_op.h"
-#include "dataset/kernels/tensor_op.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
@@ -35,93 +36,99 @@ namespace dataset {
 namespace test {
 class NoOp : public TensorOp {
  public:
-    NoOp() {};
+  NoOp(){};
 
-    ~NoOp() {};
+  ~NoOp(){};
 
-    Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override {
-      *output = std::move(input);
-      return Status::OK();
-    };
+  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override {
+    *output = std::move(input);
+    return Status::OK();
+  };
 
-    void Print(std::ostream &out) const override { out << "NoOp"; };
+  void Print(std::ostream &out) const override { out << "NoOp"; };
+
+  std::string Name() const override { return kNoOp; }
 };
 
 class ThreeToOneOp : public TensorOp {
  public:
-    ThreeToOneOp() {};
+  ThreeToOneOp(){};
 
-    ~ThreeToOneOp() {};
+  ~ThreeToOneOp(){};
 
-    uint32_t NumInput() override { return 3; }
-    // Compute function that holds the actual implementation of the operation.
-    Status Compute(const TensorRow &input, TensorRow *output) override {
-      output->push_back(input[0]);
-      return Status::OK();
-    };
+  uint32_t NumInput() override { return 3; }
+  // Compute function that holds the actual implementation of the operation.
+  Status Compute(const TensorRow &input, TensorRow *output) override {
+    output->push_back(input[0]);
+    return Status::OK();
+  };
 
-    void Print(std::ostream &out) const override { out << "ThreeToOneOp"; };
+  void Print(std::ostream &out) const override { out << "ThreeToOneOp"; };
+
+  std::string Name() const override { return "ThreeToOneOp"; }
 };
 
 class OneToThreeOp : public TensorOp {
  public:
-    OneToThreeOp() {};
+  OneToThreeOp(){};
 
-    ~OneToThreeOp() {};
+  ~OneToThreeOp(){};
 
   uint32_t NumOutput() override { return 3; }
 
-    // Compute function that holds the actual implementation of the operation.
-    // Simply pushing the same shared pointer of the first element of input vector three times.
-    Status Compute(const TensorRow &input, TensorRow *output) override {
-      output->push_back(input[0]);
-      output->push_back(input[0]);
-      output->push_back(input[0]);
-      return Status::OK();
-    };
+  // Compute function that holds the actual implementation of the operation.
+  // Simply pushing the same shared pointer of the first element of input vector three times.
+  Status Compute(const TensorRow &input, TensorRow *output) override {
+    output->push_back(input[0]);
+    output->push_back(input[0]);
+    output->push_back(input[0]);
+    return Status::OK();
+  };
 
-    void Print(std::ostream &out) const override { out << "OneToThreeOp"; };
+  void Print(std::ostream &out) const override { out << "OneToThreeOp"; };
+
+  std::string Name() const override { return "OneToThreeOp"; };
 };
 }  // namespace test
 }  // namespace dataset
 }  // namespace mindspore
 
-
 class MindDataTestMapOp : public UT::DatasetOpTesting {
  public:
-    void SetUp() override {
-      DatasetOpTesting::SetUp();
-      dataset_path_ = datasets_root_path_ + "" + "/testDataset2/testDataset2.data";
-      schema_path_ = datasets_root_path_ + "" + "/testDataset2/datasetSchema.json";
+  void SetUp() override {
+    DatasetOpTesting::SetUp();
+    dataset_path_ = datasets_root_path_ + "" + "/testDataset2/testDataset2.data";
+    schema_path_ = datasets_root_path_ + "" + "/testDataset2/datasetSchema.json";
 
-      GlobalInit();
+    GlobalInit();
 
-      // Start with an empty execution tree
-      my_tree_ = std::make_shared<ExecutionTree>();
-    }
+    // Start with an empty execution tree
+    my_tree_ = std::make_shared<ExecutionTree>();
+  }
 
-    std::shared_ptr<TFReaderOp> CreateTFReaderOp() {
-      std::shared_ptr<TFReaderOp> my_tfreader_op;
-      TFReaderOp::Builder builder;
-      builder.SetDatasetFilesList({dataset_path_})
-          .SetColumnsToLoad({"image", "label", "A", "B"})
-          .SetRowsPerBuffer(2)
-          .SetWorkerConnectorSize(2)
-          .SetNumWorkers(2);
+  std::shared_ptr<TFReaderOp> CreateTFReaderOp() {
+    std::shared_ptr<TFReaderOp> my_tfreader_op;
+    TFReaderOp::Builder builder;
+    builder.SetDatasetFilesList({dataset_path_})
+      .SetColumnsToLoad({"image", "label", "A", "B"})
+      .SetRowsPerBuffer(2)
+      .SetWorkerConnectorSize(2)
+      .SetNumWorkers(2);
 
-      std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
-      schema->LoadSchemaFile(schema_path_, {});
-      builder.SetDataSchema(std::move(schema));
+    std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
+    schema->LoadSchemaFile(schema_path_, {});
+    builder.SetDataSchema(std::move(schema));
 
-      Status rc = builder.Build(&my_tfreader_op);
-      EXPECT_TRUE(rc.IsOk());
-      return my_tfreader_op;
-    }
+    Status rc = builder.Build(&my_tfreader_op);
+    EXPECT_TRUE(rc.IsOk());
+    return my_tfreader_op;
+  }
+
+  std::shared_ptr<ExecutionTree> my_tree_;
 
-    std::shared_ptr<ExecutionTree> my_tree_;
  private:
-    std::string dataset_path_;
-    std::string schema_path_;
+  std::string dataset_path_;
+  std::string schema_path_;
 };
 
 std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int64_t conns, std::string path,
@@ -148,10 +155,7 @@ TEST_F(MindDataTestMapOp, TestAsMap) {
   my_func_list.push_back(my_no_op);
   std::shared_ptr<MapOp> my_map_op;
   MapOp::Builder builder;
-  builder.SetInColNames({"image"})
-      .SetOutColNames({"X"})
-      .SetTensorFuncs(std::move(my_func_list))
-      .SetNumWorkers(1);
+  builder.SetInColNames({"image"}).SetOutColNames({"X"}).SetTensorFuncs(std::move(my_func_list)).SetNumWorkers(1);
   rc = builder.Build(&my_map_op);
   rc = my_tree_->AssociateNode(my_map_op);
   EXPECT_TRUE(rc.IsOk());
@@ -200,9 +204,9 @@ TEST_F(MindDataTestMapOp, Test3to1) {
   std::shared_ptr<MapOp> my_map_op;
   MapOp::Builder builder;
   builder.SetInColNames({"image", "A", "B"})
-      .SetOutColNames({"X"})
-      .SetTensorFuncs(std::move(my_func_list))
-      .SetNumWorkers(1);
+    .SetOutColNames({"X"})
+    .SetTensorFuncs(std::move(my_func_list))
+    .SetNumWorkers(1);
   rc = builder.Build(&my_map_op);
   EXPECT_TRUE(rc.IsOk());
   rc = my_tree_->AssociateNode(my_map_op);
@@ -252,10 +256,9 @@ TEST_F(MindDataTestMapOp, Test1to3) {
   std::shared_ptr<MapOp> my_map_op;
   MapOp::Builder builder;
   builder.SetInColNames({"image"})
-      .SetOutColNames({"X", "Y", "Z"})
-      .SetTensorFuncs(std::move(my_func_list))
-      .SetNumWorkers(1);
-
+    .SetOutColNames({"X", "Y", "Z"})
+    .SetTensorFuncs(std::move(my_func_list))
+    .SetNumWorkers(1);
 
   // ProjectOp
   std::vector<std::string> columns_to_project = {"X", "Y", "Z", "label", "A", "B"};
@@ -296,19 +299,18 @@ TEST_F(MindDataTestMapOp, Test1to3) {
 
   // Getting the next row as vector (by position).
   TensorRow tensor_list;
-  rc =di.FetchNextTensorRow(&tensor_list);
+  rc = di.FetchNextTensorRow(&tensor_list);
   EXPECT_TRUE(rc.IsOk());
 
   // Based on the schema file, create the golden result to compare with.
   std::vector<DataType::Type> golden_types({DataType::Type::DE_UINT8, DataType::Type::DE_UINT8,
                                             DataType::Type::DE_UINT8, DataType::Type::DE_INT64,
-                                            DataType::Type::DE_FLOAT32, DataType::Type::DE_INT64}
-  );
+                                            DataType::Type::DE_FLOAT32, DataType::Type::DE_INT64});
 
   std::vector<uint64_t> golden_ranks({3, 3, 3, 1, 4, 1});
 
   std::vector<TensorShape> golden_shapes({TensorShape({3, 4, 2}), TensorShape({3, 4, 2}), TensorShape({3, 4, 2}),
-        TensorShape({7}), TensorShape({1, 13, 14, 12}), TensorShape({9})} );
+                                          TensorShape({7}), TensorShape({1, 13, 14, 12}), TensorShape({9})});
 
   while (!tensor_list.empty()) {
     for (uint32_t i = 0; i < tensor_list.size(); i++) {
@@ -343,9 +345,9 @@ TEST_F(MindDataTestMapOp, TestMultiTensorOp) {
   std::shared_ptr<MapOp> my_map_op;
   MapOp::Builder builder;
   builder.SetInColNames({"image", "A", "B"})
-      .SetOutColNames({"X", "Y", "Z"})
-      .SetTensorFuncs(std::move(my_func_list))
-      .SetNumWorkers(1);
+    .SetOutColNames({"X", "Y", "Z"})
+    .SetTensorFuncs(std::move(my_func_list))
+    .SetNumWorkers(1);
   rc = builder.Build(&my_map_op);
   EXPECT_TRUE(rc.IsOk());
   rc = my_tree_->AssociateNode(my_map_op);
@@ -405,10 +407,7 @@ TEST_F(MindDataTestMapOp, TestTFReaderRepeatMap) {
 
   std::shared_ptr<MapOp> my_map_op;
   MapOp::Builder builder;
-  builder.SetInColNames({"label"})
-    .SetOutColNames({})
-    .SetTensorFuncs(std::move(my_func_list))
-    .SetNumWorkers(5);
+  builder.SetInColNames({"label"}).SetOutColNames({}).SetTensorFuncs(std::move(my_func_list)).SetNumWorkers(5);
   rc = builder.Build(&my_map_op);
   EXPECT_TRUE(rc.IsOk());
   rc = my_tree_->AssociateNode(my_map_op);
@@ -440,7 +439,6 @@ TEST_F(MindDataTestMapOp, TestTFReaderRepeatMap) {
     MS_LOG(INFO) << "row_count: " << row_count << ".";
     rc = di.FetchNextTensorRow(&tensor_list);
     EXPECT_TRUE(rc.IsOk());
-
   }
   ASSERT_EQ(row_count, 10 * num_repeats);
 }
@@ -467,10 +465,7 @@ TEST_F(MindDataTestMapOp, TestTFReaderMapRepeat) {
 
   std::shared_ptr<MapOp> my_map_op;
   MapOp::Builder builder;
-  builder.SetInColNames({"label"})
-    .SetOutColNames({})
-    .SetTensorFuncs(std::move(my_func_list))
-    .SetNumWorkers(50);
+  builder.SetInColNames({"label"}).SetOutColNames({}).SetTensorFuncs(std::move(my_func_list)).SetNumWorkers(50);
   rc = builder.Build(&my_map_op);
   EXPECT_TRUE(rc.IsOk());
   rc = my_tree_->AssociateNode(my_map_op);
@@ -536,25 +531,18 @@ TEST_F(MindDataTestMapOp, TFReader_Decode_Repeat_Resize) {
 
   std::shared_ptr<MapOp> my_map_decode_op;
   MapOp::Builder builder;
-  builder.SetInColNames({"image"})
-    .SetOutColNames({})
-    .SetTensorFuncs(std::move(my_func_list))
-    .SetNumWorkers(4);
+  builder.SetInColNames({"image"}).SetOutColNames({}).SetTensorFuncs(std::move(my_func_list)).SetNumWorkers(4);
   rc = builder.Build(&my_map_decode_op);
   EXPECT_TRUE(rc.IsOk());
   rc = my_tree_->AssociateNode(my_map_decode_op);
   EXPECT_TRUE(rc.IsOk());
 
-
   auto resize_op = std::make_shared<ResizeOp>(300, 300);
   std::vector<std::shared_ptr<TensorOp>> my_func_list2;
   my_func_list2.push_back(resize_op);
   std::shared_ptr<MapOp> my_map_resize_op;
   MapOp::Builder builder2;
-  builder2.SetInColNames({"image"})
-    .SetOutColNames({})
-    .SetTensorFuncs(std::move(my_func_list2))
-    .SetNumWorkers(5);
+  builder2.SetInColNames({"image"}).SetOutColNames({}).SetTensorFuncs(std::move(my_func_list2)).SetNumWorkers(5);
   rc = builder2.Build(&my_map_resize_op);
   EXPECT_TRUE(rc.IsOk());
   rc = my_tree_->AssociateNode(my_map_resize_op);
@@ -610,10 +598,7 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize) {
 
   std::shared_ptr<MapOp> map_decode_map;
   MapOp::Builder map_decode_builder;
-  map_decode_builder.SetInColNames({"image"})
-    .SetOutColNames({})
-    .SetTensorFuncs(func_list)
-    .SetNumWorkers(4);
+  map_decode_builder.SetInColNames({"image"}).SetOutColNames({}).SetTensorFuncs(func_list).SetNumWorkers(4);
   rc = map_decode_builder.Build(&map_decode_map);
   EXPECT_TRUE(rc.IsOk());
 
@@ -622,10 +607,7 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize) {
   func_list2.push_back(resize_op);
   std::shared_ptr<MapOp> map_resize_op;
   MapOp::Builder map_resize_builder;
-  map_resize_builder.SetInColNames({"image"})
-    .SetOutColNames({})
-    .SetTensorFuncs(func_list2)
-    .SetNumWorkers(5);
+  map_resize_builder.SetInColNames({"image"}).SetOutColNames({}).SetTensorFuncs(func_list2).SetNumWorkers(5);
   rc = map_resize_builder.Build(&map_resize_op);
   EXPECT_TRUE(rc.IsOk());
 
@@ -704,7 +686,6 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize) {
   EXPECT_EQ(result, result2);
 }
 
-
 TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize_NoInputColumns) {
   Status rc;
   MS_LOG(INFO) << "Doing ImageFolder_Decode_Repeat_Resize_NoInputColumns.";
@@ -722,10 +703,7 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize_NoInputColumns) {
 
   std::shared_ptr<MapOp> map_decode_map;
   MapOp::Builder map_decode_builder;
-  map_decode_builder.SetInColNames({})
-    .SetOutColNames({})
-    .SetTensorFuncs(func_list)
-    .SetNumWorkers(4);
+  map_decode_builder.SetInColNames({}).SetOutColNames({}).SetTensorFuncs(func_list).SetNumWorkers(4);
   rc = map_decode_builder.Build(&map_decode_map);
   EXPECT_TRUE(rc.IsOk());
 
@@ -761,3 +739,5 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize_NoInputColumns) {
   }
   EXPECT_TRUE(i == 88);
 }
+
+
diff --git a/tests/ut/cpp/dataset/mask_test.cc b/tests/ut/cpp/dataset/mask_test.cc
index 9ff5f51fce1..609d5bf4477 100644
--- a/tests/ut/cpp/dataset/mask_test.cc
+++ b/tests/ut/cpp/dataset/mask_test.cc
@@ -15,15 +15,15 @@
  */
 #include <memory>
 #include <string>
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "securec.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/data_type.h"
-#include "dataset/kernels/data/mask_op.h"
-#include "dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/kernels/data/mask_op.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
 
 using namespace mindspore::dataset;
 
diff --git a/tests/ut/cpp/dataset/memory_pool_test.cc b/tests/ut/cpp/dataset/memory_pool_test.cc
index 136f3fe1b85..b5907655dc7 100644
--- a/tests/ut/cpp/dataset/memory_pool_test.cc
+++ b/tests/ut/cpp/dataset/memory_pool_test.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "dataset/util/memory_pool.h"
-#include "dataset/util/circular_pool.h"
-#include "dataset/util/system_pool.h"
-#include "dataset/util/allocator.h"
+#include "minddata/dataset/util/memory_pool.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/util/system_pool.h"
+#include "minddata/dataset/util/allocator.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 
diff --git a/tests/ut/cpp/dataset/mind_record_op_test.cc b/tests/ut/cpp/dataset/mind_record_op_test.cc
index b2cbdf027e3..c9067535d6e 100644
--- a/tests/ut/cpp/dataset/mind_record_op_test.cc
+++ b/tests/ut/cpp/dataset/mind_record_op_test.cc
@@ -16,14 +16,14 @@
 #include <iostream>
 #include <memory>
 #include <vector>
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "gtest/gtest.h"
-#include "mindrecord/include/shard_category.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_sample.h"
-#include "mindrecord/include/shard_shuffle.h"
+#include "minddata/mindrecord/include/shard_category.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
 #include "utils/log_adapter.h"
 
 namespace common = mindspore::common;
diff --git a/tests/ut/cpp/dataset/mnist_op_test.cc b/tests/ut/cpp/dataset/mnist_op_test.cc
index da78cb6f7fc..dfceeaa06a8 100644
--- a/tests/ut/cpp/dataset/mnist_op_test.cc
+++ b/tests/ut/cpp/dataset/mnist_op_test.cc
@@ -20,18 +20,18 @@
 
 #include "common/utils.h"
 #include "common/common.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/mnist_op.h"
-#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/normalize_op_test.cc b/tests/ut/cpp/dataset/normalize_op_test.cc
index 05ac3f62898..31791e0e666 100644
--- a/tests/ut/cpp/dataset/normalize_op_test.cc
+++ b/tests/ut/cpp/dataset/normalize_op_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/normalize_op.h"
-#include "dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/normalize_op.h"
+#include "minddata/dataset/core/cv_tensor.h"
 #include "utils/log_adapter.h"
 #include <opencv2/opencv.hpp>
 
diff --git a/tests/ut/cpp/dataset/one_hot_op_test.cc b/tests/ut/cpp/dataset/one_hot_op_test.cc
index c414e371e5a..2617ae4536f 100644
--- a/tests/ut/cpp/dataset/one_hot_op_test.cc
+++ b/tests/ut/cpp/dataset/one_hot_op_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common.h"
-#include "dataset/kernels/data/one_hot_op.h"
+#include "minddata/dataset/kernels/data/one_hot_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/pad_end_op_test.cc b/tests/ut/cpp/dataset/pad_end_op_test.cc
index 2787501aa9e..1c838da8e86 100644
--- a/tests/ut/cpp/dataset/pad_end_op_test.cc
+++ b/tests/ut/cpp/dataset/pad_end_op_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common.h"
-#include "dataset/kernels/data/pad_end_op.h"
+#include "minddata/dataset/kernels/data/pad_end_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/pad_op_test.cc b/tests/ut/cpp/dataset/pad_op_test.cc
index b659d009f37..e2bd822d029 100644
--- a/tests/ut/cpp/dataset/pad_op_test.cc
+++ b/tests/ut/cpp/dataset/pad_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/pad_op.h"
+#include "minddata/dataset/kernels/image/pad_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/path_test.cc b/tests/ut/cpp/dataset/path_test.cc
index 4cf3b179683..b36b38bbc70 100644
--- a/tests/ut/cpp/dataset/path_test.cc
+++ b/tests/ut/cpp/dataset/path_test.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/path.h"
+#include "minddata/dataset/util/path.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/perf_data_test.cc b/tests/ut/cpp/dataset/perf_data_test.cc
index 048ee1f21a5..486209be218 100644
--- a/tests/ut/cpp/dataset/perf_data_test.cc
+++ b/tests/ut/cpp/dataset/perf_data_test.cc
@@ -17,8 +17,8 @@
 #include "common/cvop_common.h"
 #include "gtest/gtest.h"
 #include "securec.h"
-#include "dataset/engine/perf/cyclic_array.h"
-#include "dataset/engine/perf/perf_data.h"
+#include "minddata/dataset/engine/perf/cyclic_array.h"
+#include "minddata/dataset/engine/perf/perf_data.h"
 
 using namespace mindspore::dataset;
 
diff --git a/tests/ut/cpp/dataset/project_op_test.cc b/tests/ut/cpp/dataset/project_op_test.cc
index 484396321c8..45ef11b88f8 100644
--- a/tests/ut/cpp/dataset/project_op_test.cc
+++ b/tests/ut/cpp/dataset/project_op_test.cc
@@ -19,7 +19,7 @@
 
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/dataset/queue_test.cc b/tests/ut/cpp/dataset/queue_test.cc
index 578405e5370..ec40cc2ae48 100644
--- a/tests/ut/cpp/dataset/queue_test.cc
+++ b/tests/ut/cpp/dataset/queue_test.cc
@@ -16,9 +16,11 @@
 
 #include "common/common.h"
 #include "gtest/gtest.h"
-#include "dataset/util/task_manager.h"
-#include "dataset/util/queue.h"
+#include "minddata/dataset/util/task_manager.h"
+#include "minddata/dataset/util/queue.h"
 #include <atomic>
+#include <chrono>
+#include <random>
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
@@ -39,7 +41,7 @@ class RefCount {
  public:
   RefCount() : v_(nullptr) {}
   explicit RefCount(int x) : v_(std::make_shared<int>(x)) {}
-  explicit RefCount(const RefCount &o) : v_(o.v_) {}
+  RefCount(const RefCount &o) : v_(o.v_) {}
   ~RefCount() {
     MS_LOG(DEBUG) << "Destructor of RefCount called" << std::endl;
     gRefCountDestructorCalled++;
@@ -167,3 +169,70 @@ TEST_F(MindDataTestQueue, Test6) {
   MS_LOG(INFO) << "Popped value " << *pepped_value << " from queue index " << chosen_queue_index;
   ASSERT_EQ(*pepped_value, 99);
 }
+using namespace std::chrono;
+template <typename QueueType, typename PayloadType>
+void Perf(int n, int p, std::string name) {
+  auto payload = std::vector<PayloadType>(n, PayloadType(p));
+  auto queue = QueueType(n);
+  auto t0 = high_resolution_clock::now();
+  auto check = 0;
+  for (int i = 0; i < queue.capacity(); i++) {
+    queue.Add(PayloadType(p));
+  }
+  check = queue.size();
+  for (int i = 0; i < queue.capacity(); i++) {
+    queue.PopFront(&payload[i]);
+  }
+  auto t1 = high_resolution_clock::now();
+  std::cout << name << " queue filled size: " << queue.size() << " " << check << std::endl;
+  auto t2 = high_resolution_clock::now();
+  for (int i = 0; i < queue.capacity(); i++) {
+    queue.Add(PayloadType(p));
+  }
+  check = queue.size();
+  for (int i = 0; i < queue.capacity(); i++) {
+    queue.PopFront(&payload[i]);
+  }
+  auto t3 = high_resolution_clock::now();
+  auto d = duration_cast<milliseconds>(t3 - t2 + t1 - t0).count();
+  std::cout << name << " queue emptied size: " << queue.size() << " " << check << std::endl;
+  std::cout << name << " "
+            << " ran in " << d << "ms" << std::endl;
+}
+
+template <typename QueueType, typename PayloadType>
+void Fuzz(int n, int p, std::string name) {
+  std::mt19937 gen(1);
+  auto payload = std::vector<PayloadType>(n, PayloadType(p));
+  auto queue = QueueType(n);
+  auto dist = std::uniform_int_distribution<int>(0, 2);
+  std::cout << "###" << std::endl;
+  for (auto i = 0; i < n; i++) {
+    auto v = dist(gen);
+    if (v == 0 && queue.size() < n - 1) {
+      queue.Add(std::move(payload[i]));
+    }
+    if (v == 1 && queue.size() > 0) {
+      queue.PopFront(&payload[i]);
+    } else {
+      queue.Reset();
+    }
+  }
+  std::cout << name << " fuzz ran " << queue.size() << std::endl;
+}
+TEST_F(MindDataTestQueue, TestPerf) {
+  try {
+    int kSz = 1000000;
+    //  std::cout << "enter size" << std::endl;
+    //  std::cin >> kSz;
+    Perf<Queue<std::vector<int>>, std::vector<int>>(kSz, 1, "old queue, vector of size 1");
+  } catch (const std::exception &e) {
+    std::cout << e.what() << std::endl;
+  }
+
+  std::cout << "Test Reset" << std::endl;
+  std::cout << "Enter fuzz size" << std::endl;
+  int fs = 1000;
+//  std::cin >> fs;
+  Fuzz<Queue<std::vector<int>>, std::vector<int>>(fs, 1, "New queue");
+}
diff --git a/tests/ut/cpp/dataset/random_color_adjust_op_test.cc b/tests/ut/cpp/dataset/random_color_adjust_op_test.cc
index 82df108ad1e..96f4dd81457 100644
--- a/tests/ut/cpp/dataset/random_color_adjust_op_test.cc
+++ b/tests/ut/cpp/dataset/random_color_adjust_op_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/random_color_adjust_op.h"
-#include "dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/random_color_adjust_op.h"
+#include "minddata/dataset/core/cv_tensor.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc b/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc
index 3d5298b0718..fd59a901172 100644
--- a/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc
+++ b/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc
@@ -16,7 +16,7 @@
 #include "common/common.h"
 #include "common/cvop_common.h"
 #include <random>
-#include "dataset/kernels/image/random_crop_and_resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/random_crop_and_resize_with_bbox_op_test.cc b/tests/ut/cpp/dataset/random_crop_and_resize_with_bbox_op_test.cc
new file mode 100644
index 00000000000..4efdcb8b786
--- /dev/null
+++ b/tests/ut/cpp/dataset/random_crop_and_resize_with_bbox_op_test.cc
@@ -0,0 +1,99 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "common/bboxop_common.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h"
+#include "utils/log_adapter.h"
+
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+
+const bool kSaveExpected = false;
+const char kOpName[] = "RandomResizedCropWithBBox_C";
+
+class MindDataTestRandomCropAndResizeWithBBoxOp : public UT::CVOP::BBOXOP::BBoxOpCommon {
+ protected:
+  MindDataTestRandomCropAndResizeWithBBoxOp() : BBoxOpCommon() {}
+};
+
+TEST_F(MindDataTestRandomCropAndResizeWithBBoxOp, TestOp1) {
+  MS_LOG(INFO) << "Doing testRandomCropAndResizeWithBBoxOp1.";
+  // setting seed here
+  uint32_t current_seed = GlobalContext::config_manager()->seed();
+  GlobalContext::config_manager()->set_seed(327362);
+  TensorRow output_tensor_row_;
+  TensorTable results;
+  int h_out = 1024;
+  int w_out = 2048;
+  float aspect_lb = 2;
+  float aspect_ub = 2.5;
+  float scale_lb = 0.2;
+  float scale_ub = 2.0;
+  auto op = std::make_unique<RandomCropAndResizeWithBBoxOp>(h_out, w_out, scale_lb, scale_ub, aspect_lb, aspect_ub);
+  Status s;
+  for (auto tensor_row_ : images_and_annotations_) {
+    s = op->Compute(tensor_row_, &output_tensor_row_);
+    EXPECT_TRUE(s.IsOk());
+    results.push_back(output_tensor_row_);
+  }
+  if (kSaveExpected) {
+    SaveImagesWithAnnotations(FileType::kExpected, std::string(kOpName), results);
+  }
+  SaveImagesWithAnnotations(FileType::kActual, std::string(kOpName), results);
+  if (!kSaveExpected) {
+    CompareActualAndExpected(std::string(kOpName));
+  }
+  GlobalContext::config_manager()->set_seed(current_seed);
+}
+
+TEST_F(MindDataTestRandomCropAndResizeWithBBoxOp, TestOp2) {
+  MS_LOG(INFO) << "Doing testRandomCropAndResizeWithBBoxOp2.";
+  TensorRow output_tensor_row_;
+  int h_out = 1024;
+  int w_out = 2048;
+  float aspect_lb = 1;
+  float aspect_ub = 1.5;
+  float scale_lb = 0.2;
+  float scale_ub = 2.0;
+  auto op = std::make_unique<RandomCropAndResizeWithBBoxOp>(h_out, w_out, scale_lb, scale_ub, aspect_lb, aspect_ub);
+  Status s;
+  for (auto tensor_row_ : images_and_annotations_) {
+    s = op->Compute(tensor_row_, &output_tensor_row_);
+    EXPECT_TRUE(s.IsOk());
+  }
+}
+
+TEST_F(MindDataTestRandomCropAndResizeWithBBoxOp, TestOp3) {
+  MS_LOG(INFO) << "Doing testRandomCropAndResizeWithBBoxOp3.";
+  TensorRow output_tensor_row_;
+  int h_out = 1024;
+  int w_out = 2048;
+  float aspect_lb = 0.2;
+  float aspect_ub = 3;
+  float scale_lb = 0.2;
+  float scale_ub = 2.0;
+  auto op = std::make_unique<RandomCropAndResizeWithBBoxOp>(h_out, w_out, scale_lb, scale_ub, aspect_lb, aspect_ub);
+  Status s;
+  for (auto tensor_row_ : images_and_annotations_) {
+    s = op->Compute(tensor_row_, &output_tensor_row_);
+    EXPECT_TRUE(s.IsOk());
+  }
+  MS_LOG(INFO) << "testRandomCropAndResizeWithBBoxOp end.";
+}
\ No newline at end of file
diff --git a/tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc b/tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc
index 1c9f3a98dcb..170525b4e7d 100644
--- a/tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc
+++ b/tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc
@@ -16,10 +16,10 @@
 #include <fstream>
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/decode_op.h"
-#include "dataset/kernels/image/random_crop_and_resize_op.h"
-#include "dataset/kernels/image/random_crop_decode_resize_op.h"
-#include "dataset/core/config_manager.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
+#include "minddata/dataset/kernels/image/random_crop_decode_resize_op.h"
+#include "minddata/dataset/core/config_manager.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
@@ -54,7 +54,7 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp2) {
   auto decode_and_crop = static_cast<RandomCropAndResizeOp>(crop_and_decode_copy);
   EXPECT_TRUE(crop_and_decode.OneToOne());
   GlobalContext::config_manager()->set_seed(42);
-  for (int k = 0; k < 100; k++) {
+  for (int k = 0; k < 10; k++) {
     (void)crop_and_decode.Compute(raw_input_tensor_, &crop_and_decode_output);
     (void)decode_and_crop.Compute(input_tensor_, &decode_and_crop_output);
     cv::Mat output1 = CVTensor::AsCVTensor(crop_and_decode_output)->mat().clone();
@@ -104,10 +104,10 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp1) {
   int mse_sum, m1, m2, count;
   double mse;
 
-  for (int k = 0; k < 100; ++k) {
+  for (int k = 0; k < 10; ++k) {
     mse_sum = 0;
     count = 0;
-    for (auto i = 0; i < 100; i++) {
+    for (auto i = 0; i < 10; i++) {
       scale = rd_scale(rd);
       aspect = rd_aspect(rd);
       crop_width = std::round(std::sqrt(h * w * scale / aspect));
diff --git a/tests/ut/cpp/dataset/random_crop_op_test.cc b/tests/ut/cpp/dataset/random_crop_op_test.cc
index 2f3b19e2f47..9c8f1f31ed2 100644
--- a/tests/ut/cpp/dataset/random_crop_op_test.cc
+++ b/tests/ut/cpp/dataset/random_crop_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/random_crop_op.h"
+#include "minddata/dataset/kernels/image/random_crop_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/random_crop_with_bbox_op_test.cc b/tests/ut/cpp/dataset/random_crop_with_bbox_op_test.cc
new file mode 100644
index 00000000000..fcf8ba2605f
--- /dev/null
+++ b/tests/ut/cpp/dataset/random_crop_with_bbox_op_test.cc
@@ -0,0 +1,91 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common/bboxop_common.h"
+#include "minddata/dataset/kernels/image/random_crop_with_bbox_op.h"
+#include "utils/log_adapter.h"
+
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+
+const bool kSaveExpected = false;
+const char kOpName[] = "RandomCropWithBBox_C";
+
+class MindDataTestRandomCropWithBBoxOp : public UT::CVOP::BBOXOP::BBoxOpCommon {
+ protected:
+  MindDataTestRandomCropWithBBoxOp() : BBoxOpCommon() {}
+  TensorRow output_tensor_row_;
+};
+
+TEST_F(MindDataTestRandomCropWithBBoxOp, TestOp1) {
+  MS_LOG(INFO) << "Doing testRandomCropWithBBoxOp1.";
+  TensorTable results;
+  unsigned int crop_height = 128;
+  unsigned int crop_width = 128;
+  // setting seed here
+  uint32_t current_seed = GlobalContext::config_manager()->seed();
+  GlobalContext::config_manager()->set_seed(327362);
+  std::unique_ptr<RandomCropWithBBoxOp> op(
+    new RandomCropWithBBoxOp(crop_height, crop_width, 0, 0, 0, 0, BorderType::kConstant, false));
+  for (auto tensor_row_ : images_and_annotations_) {
+    Status s = op->Compute(tensor_row_, &output_tensor_row_);
+    size_t actual = 0;
+    if (s == Status::OK()) {
+      TensorShape get_shape = output_tensor_row_[0]->shape();
+      actual = get_shape[0] * get_shape[1] * get_shape[2];
+      results.push_back(output_tensor_row_);
+    }
+    EXPECT_EQ(actual, crop_height * crop_width * 3);
+    EXPECT_EQ(s, Status::OK());
+    EXPECT_EQ(4, output_tensor_row_[1]->shape()[1]);  // check for existence of 4 columns
+    // Compare Code
+    if (kSaveExpected) {
+      SaveImagesWithAnnotations(FileType::kExpected, std::string(kOpName), results);
+    }
+    SaveImagesWithAnnotations(FileType::kActual, std::string(kOpName), results);
+    if (!kSaveExpected) {
+      CompareActualAndExpected(std::string(kOpName));
+    }
+    GlobalContext::config_manager()->set_seed(current_seed);
+  }
+}
+
+TEST_F(MindDataTestRandomCropWithBBoxOp, TestOp2) {
+  MS_LOG(INFO) << "Doing testRandomCropWithBBoxOp2.";
+  // Crop params
+  unsigned int crop_height = 1280;
+  unsigned int crop_width = 1280;
+  std::unique_ptr<RandomCropWithBBoxOp> op(
+    new RandomCropWithBBoxOp(crop_height, crop_width, 513, 513, 513, 513, BorderType::kConstant, false));
+
+  for (auto tensor_row_ : images_and_annotations_) {
+    Status s = op->Compute(tensor_row_, &output_tensor_row_);
+    size_t actual = 0;
+    if (s == Status::OK()) {
+      TensorShape get_shape = output_tensor_row_[0]->shape();
+      actual = get_shape[0] * get_shape[1] * get_shape[2];
+    }
+    EXPECT_EQ(actual, crop_height * crop_width * 3);
+    EXPECT_EQ(s, Status::OK());
+    EXPECT_EQ(4, output_tensor_row_[1]->shape()[1]);  // check for existence of 4 columns
+  }
+  MS_LOG(INFO) << "testRandomCropWithBBoxOp end.";
+}
diff --git a/tests/ut/cpp/dataset/random_data_op_test.cc b/tests/ut/cpp/dataset/random_data_op_test.cc
index f8a7440c034..3cb7b57ad66 100644
--- a/tests/ut/cpp/dataset/random_data_op_test.cc
+++ b/tests/ut/cpp/dataset/random_data_op_test.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include <memory>
 #include <vector>
 #include <iostream>
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/datasetops/source/random_data_op.h"
-#include "dataset/engine/data_schema.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/random_data_op.h"
+#include "minddata/dataset/engine/data_schema.h"
 
 using namespace mindspore::dataset;
 using mindspore::MsLogLevel::INFO;
diff --git a/tests/ut/cpp/dataset/random_horizontal_flip_op_test.cc b/tests/ut/cpp/dataset/random_horizontal_flip_op_test.cc
index eb2f7535549..bb4ba7498dd 100644
--- a/tests/ut/cpp/dataset/random_horizontal_flip_op_test.cc
+++ b/tests/ut/cpp/dataset/random_horizontal_flip_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/random_horizontal_flip_op.h"
+#include "minddata/dataset/kernels/image/random_horizontal_flip_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/random_horizontal_flip_with_bbox_test.cc b/tests/ut/cpp/dataset/random_horizontal_flip_with_bbox_test.cc
new file mode 100644
index 00000000000..ed4e866478c
--- /dev/null
+++ b/tests/ut/cpp/dataset/random_horizontal_flip_with_bbox_test.cc
@@ -0,0 +1,50 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "common/bboxop_common.h"
+#include "minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h"
+#include "utils/log_adapter.h"
+
+using namespace mindspore::dataset;
+using mindspore::MsLogLevel::INFO;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::LogStream;
+
+const bool kSaveExpected = false;
+const char kOpName[] = "RandomHorizontalFlipWithBBox";
+
+class MindDataTestRandomHorizontalFlipWithBBoxOp : public UT::CVOP::BBOXOP::BBoxOpCommon {
+ protected:
+  MindDataTestRandomHorizontalFlipWithBBoxOp() : UT::CVOP::BBOXOP::BBoxOpCommon() {}
+};
+
+TEST_F(MindDataTestRandomHorizontalFlipWithBBoxOp, TestOp) {
+  MS_LOG(INFO) << "Doing testRandomHorizontalFlipWithBBox.";
+  TensorTable results;
+  std::unique_ptr<RandomHorizontalFlipWithBBoxOp> op(new RandomHorizontalFlipWithBBoxOp(1));
+  for (const auto &row: images_and_annotations_) {
+    TensorRow output_row;
+    Status s = op->Compute(row, &output_row);
+    EXPECT_TRUE(s.IsOk());
+    results.push_back(output_row);
+  }
+  if (kSaveExpected) {
+    SaveImagesWithAnnotations(FileType::kExpected, std::string(kOpName), results);
+  }
+  SaveImagesWithAnnotations(FileType::kActual , std::string(kOpName), results);
+  if (!kSaveExpected) {
+    CompareActualAndExpected(std::string(kOpName));
+  }
+}
diff --git a/tests/ut/cpp/dataset/random_resize_op_test.cc b/tests/ut/cpp/dataset/random_resize_op_test.cc
index ee185f2fc68..d9e85de6e53 100644
--- a/tests/ut/cpp/dataset/random_resize_op_test.cc
+++ b/tests/ut/cpp/dataset/random_resize_op_test.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/kernels/image/random_resize_op.h"
+#include "minddata/dataset/kernels/image/random_resize_op.h"
 #include "common/common.h"
 #include "common/cvop_common.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/random_resize_with_bbox_op_test.cc b/tests/ut/cpp/dataset/random_resize_with_bbox_op_test.cc
new file mode 100644
index 00000000000..e106f57375f
--- /dev/null
+++ b/tests/ut/cpp/dataset/random_resize_with_bbox_op_test.cc
@@ -0,0 +1,59 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common/bboxop_common.h"
+#include "minddata/dataset/kernels/image/random_resize_with_bbox_op.h"
+#include "utils/log_adapter.h"
+
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/global_context.h"
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+
+const bool kSaveExpected = false;
+const char kOpName[] = "RandomResizeWithBBox_C";
+
+class MindDataTestRandomResizeWithBBoxOp : public UT::CVOP::BBOXOP::BBoxOpCommon {
+ protected:
+  MindDataTestRandomResizeWithBBoxOp() : BBoxOpCommon() {}
+};
+TEST_F(MindDataTestRandomResizeWithBBoxOp, TestOp) {
+  MS_LOG(INFO) << "Doing testRandomResizeWithBBox.";
+  //setting seed here
+  u_int32_t curr_seed = GlobalContext::config_manager()->seed();
+  GlobalContext::config_manager()->set_seed(120);
+  TensorTable results;
+  std::unique_ptr<RandomResizeWithBBoxOp> op(new RandomResizeWithBBoxOp(500));
+  for (const auto &tensor_row_ : images_and_annotations_) {
+    // selected a tensorRow
+    TensorRow output_row;
+    Status s = op->Compute(tensor_row_, &output_row);
+    EXPECT_TRUE(s.IsOk());
+    results.push_back(output_row);
+  }
+  if (kSaveExpected) {
+    SaveImagesWithAnnotations(FileType::kExpected, std::string(kOpName), results);
+  }
+  SaveImagesWithAnnotations(FileType::kActual, std::string(kOpName), results);
+  if (!kSaveExpected) {
+    CompareActualAndExpected(std::string(kOpName));
+  }
+  GlobalContext::config_manager()->set_seed(curr_seed);
+  MS_LOG(INFO) << "testRandomResizeWithBBox end.";
+}
diff --git a/tests/ut/cpp/dataset/random_rotation_op_test.cc b/tests/ut/cpp/dataset/random_rotation_op_test.cc
index 8b82ef1dcd0..a6eb5a1ff39 100644
--- a/tests/ut/cpp/dataset/random_rotation_op_test.cc
+++ b/tests/ut/cpp/dataset/random_rotation_op_test.cc
@@ -16,8 +16,8 @@
 #include <opencv2/imgcodecs.hpp>
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/random_rotation_op.h"
-#include "dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/image/random_rotation_op.h"
+#include "minddata/dataset/core/cv_tensor.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/random_vertical_flip_op_test.cc b/tests/ut/cpp/dataset/random_vertical_flip_op_test.cc
index a2583cab967..db8cc89893e 100644
--- a/tests/ut/cpp/dataset/random_vertical_flip_op_test.cc
+++ b/tests/ut/cpp/dataset/random_vertical_flip_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/random_vertical_flip_op.h"
+#include "minddata/dataset/kernels/image/random_vertical_flip_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/random_vertical_flip_with_bbox_op_test.cc b/tests/ut/cpp/dataset/random_vertical_flip_with_bbox_op_test.cc
new file mode 100644
index 00000000000..d1946ef700f
--- /dev/null
+++ b/tests/ut/cpp/dataset/random_vertical_flip_with_bbox_op_test.cc
@@ -0,0 +1,51 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common/bboxop_common.h"
+#include "minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.h"
+#include "utils/log_adapter.h"
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+
+const bool kSaveExpected = false;
+const char kOpName[] = "RandomVerticalFlipWithBBox_C";
+
+class MindDataTestRandomVerticalFlipWithBBoxOp : public UT::CVOP::BBOXOP::BBoxOpCommon {
+ protected:
+  MindDataTestRandomVerticalFlipWithBBoxOp() : BBoxOpCommon() {}
+};
+TEST_F(MindDataTestRandomVerticalFlipWithBBoxOp, TestOp) {
+  MS_LOG(INFO) << "Doing testRandomVerticalFlipWithBBoxOp.";
+  TensorTable results;
+  std::unique_ptr<RandomVerticalFlipWithBBoxOp> op(new RandomVerticalFlipWithBBoxOp(1));
+  for (const auto &tensor_row_ : images_and_annotations_) {
+    TensorRow output_row;
+    Status s = op->Compute(tensor_row_, &output_row);
+    EXPECT_TRUE(s.IsOk());
+    results.push_back(output_row);
+  }
+  if (kSaveExpected) {
+    SaveImagesWithAnnotations(FileType::kExpected, std::string(kOpName), results);
+  }
+  SaveImagesWithAnnotations(FileType::kActual, std::string(kOpName), results);
+  if (!kSaveExpected) {
+    CompareActualAndExpected(std::string(kOpName));
+  }
+  MS_LOG(INFO) << "testRandomVerticalFlipWithBBoxOp end.";
+}
diff --git a/tests/ut/cpp/dataset/rename_op_test.cc b/tests/ut/cpp/dataset/rename_op_test.cc
index b6849ec53ef..ac64346c26e 100644
--- a/tests/ut/cpp/dataset/rename_op_test.cc
+++ b/tests/ut/cpp/dataset/rename_op_test.cc
@@ -17,15 +17,15 @@
 #include <iostream>
 #include <memory>
 #include <string>
-#include "dataset/core/client.h"
-#include "dataset/core/constants.h"
-#include "dataset/engine/datasetops/map_op.h"
-#include "dataset/engine/datasetops/rename_op.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/engine/datasetops/map_op.h"
+#include "minddata/dataset/engine/datasetops/rename_op.h"
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_buffer.h"
 #include "gtest/gtest.h"
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/global_context.h"
 #include "utils/log_adapter.h"
 
 namespace common = mindspore::common;
@@ -51,7 +51,7 @@ TEST_F(MindDataTestRenameOp, TestRenameOpDefault) {
   auto my_tree = std::make_shared<ExecutionTree>();
   // Creating TFReaderOp
 
-  std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images_1/train-0000-of-0001.data";
+  std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
   std::shared_ptr<TFReaderOp> my_tfreader_op;
   rc = TFReaderOp::Builder()
       .SetDatasetFilesList({dataset_path})
diff --git a/tests/ut/cpp/dataset/repeat_op_test.cc b/tests/ut/cpp/dataset/repeat_op_test.cc
index 42549546ba3..74d494c0dce 100644
--- a/tests/ut/cpp/dataset/repeat_op_test.cc
+++ b/tests/ut/cpp/dataset/repeat_op_test.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/circular_pool.h"
-#include "dataset/core/client.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/rescale_op_test.cc b/tests/ut/cpp/dataset/rescale_op_test.cc
index 86abbe972ee..5d9bf32a9f9 100644
--- a/tests/ut/cpp/dataset/rescale_op_test.cc
+++ b/tests/ut/cpp/dataset/rescale_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/rescale_op.h"
+#include "minddata/dataset/kernels/image/rescale_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/resize_bilinear_op_test.cc b/tests/ut/cpp/dataset/resize_bilinear_op_test.cc
index 8642484149c..910c8af2a2b 100644
--- a/tests/ut/cpp/dataset/resize_bilinear_op_test.cc
+++ b/tests/ut/cpp/dataset/resize_bilinear_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/resize_bilinear_op.h"
+#include "minddata/dataset/kernels/image/resize_bilinear_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/resize_op_test.cc b/tests/ut/cpp/dataset/resize_op_test.cc
index e23320a65ad..807668dde41 100644
--- a/tests/ut/cpp/dataset/resize_op_test.cc
+++ b/tests/ut/cpp/dataset/resize_op_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/resize_op.h"
+#include "minddata/dataset/kernels/image/resize_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/resize_with_bbox_op_test.cc b/tests/ut/cpp/dataset/resize_with_bbox_op_test.cc
new file mode 100644
index 00000000000..f9eaf85a550
--- /dev/null
+++ b/tests/ut/cpp/dataset/resize_with_bbox_op_test.cc
@@ -0,0 +1,54 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common/bboxop_common.h"
+#include "minddata/dataset/kernels/image/resize_with_bbox_op.h"
+#include "utils/log_adapter.h"
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+
+const bool kSaveExpected = false;
+const char kOpName[] = "ResizeWithBBox_C";
+
+class MindDataTestResizeWithBBoxOp : public UT::CVOP::BBOXOP::BBoxOpCommon {
+ protected:
+  MindDataTestResizeWithBBoxOp() : BBoxOpCommon() {}
+};
+TEST_F(MindDataTestResizeWithBBoxOp, TestOp) {
+  MS_LOG(INFO) << "Doing testResizeWithBBox.";
+  // resize
+  TensorTable results;
+  std::unique_ptr<ResizeWithBBoxOp> op(new ResizeWithBBoxOp(500));
+  for (const auto &tensor_row_ : images_and_annotations_) {
+    // selected a tensorRow
+    TensorRow output_row;
+    Status s = op->Compute(tensor_row_, &output_row);
+    EXPECT_TRUE(s.IsOk());
+    results.push_back(output_row);
+  }
+  if (kSaveExpected) {
+    SaveImagesWithAnnotations(FileType::kExpected, std::string(kOpName), results);
+  }
+  SaveImagesWithAnnotations(FileType::kActual, std::string(kOpName), results);
+  if (!kSaveExpected) {
+    CompareActualAndExpected(std::string(kOpName));
+  }
+
+  MS_LOG(INFO) << "testResizeWithBBox end.";
+}
diff --git a/tests/ut/cpp/dataset/schema_test.cc b/tests/ut/cpp/dataset/schema_test.cc
index 2da61bc047b..95b9c75d9e3 100644
--- a/tests/ut/cpp/dataset/schema_test.cc
+++ b/tests/ut/cpp/dataset/schema_test.cc
@@ -19,11 +19,11 @@
 #include <string>
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/data_schema.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/shuffle_op_test.cc b/tests/ut/cpp/dataset/shuffle_op_test.cc
index c9bcb24c4e6..98b4878efb8 100644
--- a/tests/ut/cpp/dataset/shuffle_op_test.cc
+++ b/tests/ut/cpp/dataset/shuffle_op_test.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "gtest/gtest.h"
diff --git a/tests/ut/cpp/dataset/skip_op_test.cc b/tests/ut/cpp/dataset/skip_op_test.cc
index 697745512d3..387d2f69ffd 100644
--- a/tests/ut/cpp/dataset/skip_op_test.cc
+++ b/tests/ut/cpp/dataset/skip_op_test.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/circular_pool.h"
-#include "dataset/core/client.h"
+#include "minddata/dataset/util/circular_pool.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/stand_alone_samplers_test.cc b/tests/ut/cpp/dataset/stand_alone_samplers_test.cc
index dfe15a8f153..96e9652bbc5 100644
--- a/tests/ut/cpp/dataset/stand_alone_samplers_test.cc
+++ b/tests/ut/cpp/dataset/stand_alone_samplers_test.cc
@@ -15,13 +15,13 @@
  */
 
 #include "common/common.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/status_test.cc b/tests/ut/cpp/dataset/status_test.cc
index c64a86b8bae..195da1c1199 100644
--- a/tests/ut/cpp/dataset/status_test.cc
+++ b/tests/ut/cpp/dataset/status_test.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/status.h"
+#include "minddata/dataset/util/status.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/subset_random_sampler_test.cc b/tests/ut/cpp/dataset/subset_random_sampler_test.cc
index 22200ccbac9..c389686014e 100644
--- a/tests/ut/cpp/dataset/subset_random_sampler_test.cc
+++ b/tests/ut/cpp/dataset/subset_random_sampler_test.cc
@@ -16,11 +16,11 @@
 #include "common/common.h"
 #include "gtest/gtest.h"
 
-#include "dataset/core/constants.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
 
 #include <vector>
 #include <unordered_set>
diff --git a/tests/ut/cpp/dataset/take_op_test.cc b/tests/ut/cpp/dataset/take_op_test.cc
index b7be066d6cb..a8bfe40b105 100644
--- a/tests/ut/cpp/dataset/take_op_test.cc
+++ b/tests/ut/cpp/dataset/take_op_test.cc
@@ -19,7 +19,7 @@
 
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/dataset/task_manager_test.cc b/tests/ut/cpp/dataset/task_manager_test.cc
index 3d34ec9ec5d..7b8101fa566 100644
--- a/tests/ut/cpp/dataset/task_manager_test.cc
+++ b/tests/ut/cpp/dataset/task_manager_test.cc
@@ -16,7 +16,7 @@
 
 #include "common/common.h"
 #include "gtest/gtest.h"
-#include "dataset/util/task_manager.h"
+#include "minddata/dataset/util/task_manager.h"
 
 using namespace mindspore::dataset;
 
diff --git a/tests/ut/cpp/dataset/tensor_op_fusion_pass_test.cc b/tests/ut/cpp/dataset/tensor_op_fusion_pass_test.cc
new file mode 100644
index 00000000000..70832c04b5a
--- /dev/null
+++ b/tests/ut/cpp/dataset/tensor_op_fusion_pass_test.cc
@@ -0,0 +1,105 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <string>
+#include "minddata/dataset/core/client.h"
+#include "common/common.h"
+#include "gtest/gtest.h"
+#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
+#include "minddata/dataset/kernels/image/decode_op.h"
+#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
+#include "minddata/dataset/engine/execution_tree.h"
+
+
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::MsLogLevel::INFO;
+
+class MindDataTestTensorOpFusionPass : public UT::DatasetOpTesting {
+ public:
+  MindDataTestTensorOpFusionPass() = default;
+  void SetUp() override { GlobalInit(); }
+};
+
+TEST_F(MindDataTestTensorOpFusionPass, RandomCropDecodeResize_fusion_disabled) {
+  MS_LOG(INFO) << "Doing RandomCropDecodeResize_fusion";
+  std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int64_t conns, std::string path,
+                                             bool shuf = false, std::shared_ptr<Sampler> sampler = nullptr,
+                                             std::map<std::string, int32_t> map = {}, bool decode = false);
+  std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
+  auto rcar_op = std::make_shared<RandomCropAndResizeOp>();
+  auto decode_op = std::make_shared<DecodeOp>();
+  Status rc;
+  std::vector<std::shared_ptr<TensorOp>> func_list;
+  func_list.push_back(decode_op);
+  func_list.push_back(rcar_op);
+  std::shared_ptr<MapOp> map_op;
+  MapOp::Builder map_decode_builder;
+  map_decode_builder.SetInColNames({}).SetOutColNames({}).SetTensorFuncs(func_list).SetNumWorkers(4);
+  rc = map_decode_builder.Build(&map_op);
+  EXPECT_TRUE(rc.IsOk());
+  auto tree = std::make_shared<ExecutionTree>();
+  tree = Build({ImageFolder(16, 2, 32, "./", false), map_op});
+  rc = tree->SetOptimize(false);
+  EXPECT_TRUE(rc);
+  rc = tree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+  rc = tree->SetOptimize(false);
+  EXPECT_TRUE(rc.IsError());
+  auto it = tree->begin();
+  ++it;
+  auto *m_op = &(*it);
+  auto tfuncs = static_cast<MapOp *>(m_op)->TFuncs();
+  auto func_it = tfuncs.begin();
+  EXPECT_EQ((*func_it)->Name(), kDecodeOp);
+  ++func_it;
+  EXPECT_EQ((*func_it)->Name(), kRandomCropAndResizeOp);
+}
+
+TEST_F(MindDataTestTensorOpFusionPass, RandomCropDecodeResize_fusion_enabled) {
+  MS_LOG(INFO) << "Doing RandomCropDecodeResize_fusion";
+  std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int64_t conns, std::string path,
+                                             bool shuf = false, std::shared_ptr<Sampler> sampler = nullptr,
+                                             std::map<std::string, int32_t> map = {}, bool decode = false);
+  std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
+  auto rcar_op = std::make_shared<RandomCropAndResizeOp>();
+  auto decode_op = std::make_shared<DecodeOp>();
+  Status rc;
+  std::vector<std::shared_ptr<TensorOp>> func_list;
+  func_list.push_back(decode_op);
+  func_list.push_back(rcar_op);
+  std::shared_ptr<MapOp> map_op;
+  MapOp::Builder map_decode_builder;
+  map_decode_builder.SetInColNames({}).SetOutColNames({}).SetTensorFuncs(func_list).SetNumWorkers(4);
+  rc = map_decode_builder.Build(&map_op);
+  EXPECT_TRUE(rc.IsOk());
+  auto tree = std::make_shared<ExecutionTree>();
+  tree = Build({ImageFolder(16, 2, 32, "./", false), map_op});
+  rc = tree->SetOptimize(true);
+  EXPECT_TRUE(rc);
+  rc = tree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+  rc = tree->SetOptimize(false);
+  EXPECT_TRUE(rc.IsError());
+  auto it = tree->begin();
+  ++it;
+  auto *m_op = &(*it);
+  auto tfuncs = static_cast<MapOp *>(m_op)->TFuncs();
+  auto func_it = tfuncs.begin();
+  EXPECT_EQ((*func_it)->Name(), kRandomCropDecodeResizeOp);
+  EXPECT_EQ(++func_it, tfuncs.end());
+}
\ No newline at end of file
diff --git a/tests/ut/cpp/dataset/tensor_string_test.cc b/tests/ut/cpp/dataset/tensor_string_test.cc
index 43b235304da..fe336a34c52 100644
--- a/tests/ut/cpp/dataset/tensor_string_test.cc
+++ b/tests/ut/cpp/dataset/tensor_string_test.cc
@@ -15,13 +15,13 @@
  */
 #include <memory>
 #include <string>
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "securec.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/data_type.h"
 
 using namespace mindspore::dataset;
 
diff --git a/tests/ut/cpp/dataset/tensor_test.cc b/tests/ut/cpp/dataset/tensor_test.cc
index 1aa3cad2fa1..fce4652b47a 100644
--- a/tests/ut/cpp/dataset/tensor_test.cc
+++ b/tests/ut/cpp/dataset/tensor_test.cc
@@ -15,13 +15,13 @@
  */
 #include <memory>
 #include <string>
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "securec.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/data_type.h"
 
 using namespace mindspore::dataset;
 
@@ -432,3 +432,17 @@ TEST_F(MindDataTestTensorDE, TensorConcatenate) {
   s = t1->Concatenate({5}, t2);
   EXPECT_FALSE(s.IsOk());
 }
+
+TEST_F(MindDataTestTensorDE, TensorEmpty) {
+  std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_UINT64));
+  ASSERT_TRUE(t->HasData());
+}
+
+TEST_F(MindDataTestTensorDE, TensorEmptyInvalidate) {
+  std::vector<uint32_t> values1 = {1, 2, 3, 0, 0, 0};
+  std::shared_ptr<Tensor> t;
+  Tensor::CreateTensor(&t, values1);
+  t->Invalidate();
+  ASSERT_TRUE(t->HasData());
+}
+
diff --git a/tests/ut/cpp/dataset/tensorshape_test.cc b/tests/ut/cpp/dataset/tensorshape_test.cc
index 1af0bf9c823..65ab386db0e 100644
--- a/tests/ut/cpp/dataset/tensorshape_test.cc
+++ b/tests/ut/cpp/dataset/tensorshape_test.cc
@@ -15,10 +15,10 @@
  */
 #include <string>
 #include "./securec.h"
-#include "dataset/core/client.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor_shape.h"
-#include "dataset/engine/data_schema.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/data_schema.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "gtest/gtest.h"
diff --git a/tests/ut/cpp/dataset/text_file_op_test.cc b/tests/ut/cpp/dataset/text_file_op_test.cc
index 7887eda9552..bc2674a6a30 100644
--- a/tests/ut/cpp/dataset/text_file_op_test.cc
+++ b/tests/ut/cpp/dataset/text_file_op_test.cc
@@ -17,13 +17,13 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "dataset/engine/datasetops/source/text_file_op.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/engine/datasetops/source/text_file_op.h"
+#include "minddata/dataset/util/status.h"
 
 namespace common = mindspore::common;
 
diff --git a/tests/ut/cpp/dataset/tfReader_op_test.cc b/tests/ut/cpp/dataset/tfReader_op_test.cc
index 9b312296d86..30fde33ff9c 100644
--- a/tests/ut/cpp/dataset/tfReader_op_test.cc
+++ b/tests/ut/cpp/dataset/tfReader_op_test.cc
@@ -17,8 +17,8 @@
 #include <memory>
 #include <vector>
 
-#include "dataset/core/client.h"
-#include "dataset/engine/data_schema.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/engine/data_schema.h"
 #include "common/common.h"
 #include "common/utils.h"
 #include "gtest/gtest.h"
diff --git a/tests/ut/cpp/dataset/to_float16_op_test.cc b/tests/ut/cpp/dataset/to_float16_op_test.cc
index 9c49c67b2cf..5c886690c91 100644
--- a/tests/ut/cpp/dataset/to_float16_op_test.cc
+++ b/tests/ut/cpp/dataset/to_float16_op_test.cc
@@ -15,9 +15,9 @@
  */
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/image/random_rotation_op.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/kernels/data/to_float16_op.h"
+#include "minddata/dataset/kernels/image/random_rotation_op.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/kernels/data/to_float16_op.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
diff --git a/tests/ut/cpp/dataset/tokenizer_op_test.cc b/tests/ut/cpp/dataset/tokenizer_op_test.cc
index 8a18f0da0cf..cc2d7473ff8 100644
--- a/tests/ut/cpp/dataset/tokenizer_op_test.cc
+++ b/tests/ut/cpp/dataset/tokenizer_op_test.cc
@@ -18,14 +18,14 @@
 #include <string_view>
 
 #include "common/common.h"
-#include "dataset/text/kernels/basic_tokenizer_op.h"
-#include "dataset/text/kernels/case_fold_op.h"
-#include "dataset/text/kernels/normalize_utf8_op.h"
-#include "dataset/text/kernels/regex_replace_op.h"
-#include "dataset/text/kernels/regex_tokenizer_op.h"
-#include "dataset/text/kernels/unicode_char_tokenizer_op.h"
-#include "dataset/text/kernels/unicode_script_tokenizer_op.h"
-#include "dataset/text/kernels/whitespace_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/basic_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/case_fold_op.h"
+#include "minddata/dataset/text/kernels/normalize_utf8_op.h"
+#include "minddata/dataset/text/kernels/regex_replace_op.h"
+#include "minddata/dataset/text/kernels/regex_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/unicode_char_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/unicode_script_tokenizer_op.h"
+#include "minddata/dataset/text/kernels/whitespace_tokenizer_op.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 
@@ -45,227 +45,245 @@ class MindDataTestTokenizerOp : public UT::Common {
 
 TEST_F(MindDataTestTokenizerOp, TestUnicodeCharTokenizerOp) {
   MS_LOG(INFO) << "Doing TestUnicodeCharTokenizerOp.";
-  std::unique_ptr<UnicodeCharTokenizerOp> op(new UnicodeCharTokenizerOp());
+  std::unique_ptr<UnicodeCharTokenizerOp> op(new UnicodeCharTokenizerOp(true));
   std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Hello World!");
-  std::shared_ptr<Tensor> output;
-  Status s = op->Compute(input, &output);
+  TensorRow output;
+  Status s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 12);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor1: " << output->ToString();
-  CheckEqual(output, {0}, "H");
-  CheckEqual(output, {1}, "e");
-  CheckEqual(output, {2}, "l");
-  CheckEqual(output, {3}, "l");
-  CheckEqual(output, {4}, "o");
-  CheckEqual(output, {5}, " ");
-  CheckEqual(output, {6}, "W");
-  CheckEqual(output, {7}, "o");
-  CheckEqual(output, {8}, "r");
-  CheckEqual(output, {9}, "l");
-  CheckEqual(output, {10}, "d");
-  CheckEqual(output, {11}, "!");
+  EXPECT_EQ(output[0]->Size(), 12);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor1: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "H");
+  CheckEqual(output[0], {1}, "e");
+  CheckEqual(output[0], {2}, "l");
+  CheckEqual(output[0], {3}, "l");
+  CheckEqual(output[0], {4}, "o");
+  CheckEqual(output[0], {5}, " ");
+  CheckEqual(output[0], {6}, "W");
+  CheckEqual(output[0], {7}, "o");
+  CheckEqual(output[0], {8}, "r");
+  CheckEqual(output[0], {9}, "l");
+  CheckEqual(output[0], {10}, "d");
+  CheckEqual(output[0], {11}, "!");
 
   input = std::make_shared<Tensor>("中国 你好!");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 6);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor2: " << output->ToString();
-  CheckEqual(output, {0}, "中");
-  CheckEqual(output, {1}, "国");
-  CheckEqual(output, {2}, " ");
-  CheckEqual(output, {3}, "你");
-  CheckEqual(output, {4}, "好");
-  CheckEqual(output, {5}, "!");
+  EXPECT_EQ(output[0]->Size(), 6);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor2: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "中");
+  CheckEqual(output[0], {1}, "国");
+  CheckEqual(output[0], {2}, " ");
+  CheckEqual(output[0], {3}, "你");
+  CheckEqual(output[0], {4}, "好");
+  CheckEqual(output[0], {5}, "!");
 
   input = std::make_shared<Tensor>("中");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor3: " << output->ToString();
-  CheckEqual(output, {0}, "中");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor3: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "中");
 
   input = std::make_shared<Tensor>("H");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor4: " << output->ToString();
-  CheckEqual(output, {0}, "H");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor4: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "H");
 
   input = std::make_shared<Tensor>("  ");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 2);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor5: " << output->ToString();
-  CheckEqual(output, {0}, " ");
-  CheckEqual(output, {1}, " ");
+  EXPECT_EQ(output[0]->Size(), 2);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor5: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, " ");
+  CheckEqual(output[0], {1}, " ");
 
   input = std::make_shared<Tensor>("");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor6: " << output->ToString();
-  CheckEqual(output, {0}, "");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor6: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "");
 }
 
 TEST_F(MindDataTestTokenizerOp, TestWhitespaceTokenizerOp) {
   MS_LOG(INFO) << "Doing TestWhitespaceTokenizerOp.";
-  std::unique_ptr<WhitespaceTokenizerOp> op(new WhitespaceTokenizerOp());
+  std::unique_ptr<WhitespaceTokenizerOp> op(new WhitespaceTokenizerOp(true));
   std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China.");
-  std::shared_ptr<Tensor> output;
-  Status s = op->Compute(input, &output);
+  TensorRow output;
+  Status s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 3);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor1: " << output->ToString();
-  CheckEqual(output, {0}, "Welcome");
-  CheckEqual(output, {1}, "to");
-  CheckEqual(output, {2}, "China.");
+  EXPECT_EQ(output[0]->Size(), 3);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor1: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "Welcome");
+  CheckEqual(output[0], {1}, "to");
+  CheckEqual(output[0], {2}, "China.");
 
   input = std::make_shared<Tensor>("  hello");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor2: " << output->ToString();
-  CheckEqual(output, {0}, "hello");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor2: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "hello");
 
   input = std::make_shared<Tensor>("hello");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor3: " << output->ToString();
-  CheckEqual(output, {0}, "hello");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor3: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "hello");
 
   input = std::make_shared<Tensor>("hello  ");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor4: " << output->ToString();
-  CheckEqual(output, {0}, "hello");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor4: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "hello");
 
   input = std::make_shared<Tensor>("  ");
-  s = op->Compute(input, &output);
+  output.clear();
+  s = op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor5: " << output->ToString();
-  CheckEqual(output, {0}, "");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor5: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "");
 }
 
 TEST_F(MindDataTestTokenizerOp, TestUnicodeScriptTokenizer) {
   MS_LOG(INFO) << "Doing TestUnicodeScriptTokenizer.";
-  std::unique_ptr<UnicodeScriptTokenizerOp> keep_whitespace_op(new UnicodeScriptTokenizerOp(true));
-  std::unique_ptr<UnicodeScriptTokenizerOp> skip_whitespace_op(new UnicodeScriptTokenizerOp(false));
+  std::unique_ptr<UnicodeScriptTokenizerOp> keep_whitespace_op(new UnicodeScriptTokenizerOp(true, true));
+  std::unique_ptr<UnicodeScriptTokenizerOp> skip_whitespace_op(new UnicodeScriptTokenizerOp(false, true));
 
   std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. \n 中国\t北京");
-  std::shared_ptr<Tensor> output;
-  Status s = keep_whitespace_op->Compute(input, &output);
+  TensorRow output;
+  Status s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 10);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor1: " << output->ToString();
-  CheckEqual(output, {0}, "Welcome");
-  CheckEqual(output, {1}, " ");
-  CheckEqual(output, {2}, "to");
-  CheckEqual(output, {3}, " ");
-  CheckEqual(output, {4}, "China");
-  CheckEqual(output, {5}, ".");
-  CheckEqual(output, {6}, " \n ");
-  CheckEqual(output, {7}, "中国");
-  CheckEqual(output, {8}, "\t");
-  CheckEqual(output, {9}, "北京");
-  s = skip_whitespace_op->Compute(input, &output);
+  EXPECT_EQ(output[0]->Size(), 10);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor1: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "Welcome");
+  CheckEqual(output[0], {1}, " ");
+  CheckEqual(output[0], {2}, "to");
+  CheckEqual(output[0], {3}, " ");
+  CheckEqual(output[0], {4}, "China");
+  CheckEqual(output[0], {5}, ".");
+  CheckEqual(output[0], {6}, " \n ");
+  CheckEqual(output[0], {7}, "中国");
+  CheckEqual(output[0], {8}, "\t");
+  CheckEqual(output[0], {9}, "北京");
+  output.clear();
+  s = skip_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 6);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor2: " << output->ToString();
-  CheckEqual(output, {0}, "Welcome");
-  CheckEqual(output, {1}, "to");
-  CheckEqual(output, {2}, "China");
-  CheckEqual(output, {3}, ".");
-  CheckEqual(output, {4}, "中国");
-  CheckEqual(output, {5}, "北京");
+  EXPECT_EQ(output[0]->Size(), 6);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor2: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "Welcome");
+  CheckEqual(output[0], {1}, "to");
+  CheckEqual(output[0], {2}, "China");
+  CheckEqual(output[0], {3}, ".");
+  CheckEqual(output[0], {4}, "中国");
+  CheckEqual(output[0], {5}, "北京");
 
   input = std::make_shared<Tensor>("  Welcome to 中国.  ");
-  s = skip_whitespace_op->Compute(input, &output);
+  output.clear();
+  s = skip_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 4);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor3: " << output->ToString();
-  CheckEqual(output, {0}, "Welcome");
-  CheckEqual(output, {1}, "to");
-  CheckEqual(output, {2}, "中国");
-  CheckEqual(output, {3}, ".");
-  s = keep_whitespace_op->Compute(input, &output);
+  EXPECT_EQ(output[0]->Size(), 4);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor3: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "Welcome");
+  CheckEqual(output[0], {1}, "to");
+  CheckEqual(output[0], {2}, "中国");
+  CheckEqual(output[0], {3}, ".");
+  output.clear();
+  s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 8);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor4: " << output->ToString();
-  CheckEqual(output, {0}, "  ");
-  CheckEqual(output, {1}, "Welcome");
-  CheckEqual(output, {2}, " ");
-  CheckEqual(output, {3}, "to");
-  CheckEqual(output, {4}, " ");
-  CheckEqual(output, {5}, "中国");
-  CheckEqual(output, {6}, ".");
-  CheckEqual(output, {7}, "  ");
+  EXPECT_EQ(output[0]->Size(), 8);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor4: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "  ");
+  CheckEqual(output[0], {1}, "Welcome");
+  CheckEqual(output[0], {2}, " ");
+  CheckEqual(output[0], {3}, "to");
+  CheckEqual(output[0], {4}, " ");
+  CheckEqual(output[0], {5}, "中国");
+  CheckEqual(output[0], {6}, ".");
+  CheckEqual(output[0], {7}, "  ");
 
   input = std::make_shared<Tensor>("Hello");
-  s = keep_whitespace_op->Compute(input, &output);
+  output.clear();
+  s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor5: " << output->ToString();
-  CheckEqual(output, {0}, "Hello");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor5: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "Hello");
 
   input = std::make_shared<Tensor>("H");
-  s = keep_whitespace_op->Compute(input, &output);
+  output.clear();
+  s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor6: " << output->ToString();
-  CheckEqual(output, {0}, "H");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor6: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "H");
 
   input = std::make_shared<Tensor>("");
-  s = keep_whitespace_op->Compute(input, &output);
+  output.clear();
+  s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor7: " << output->ToString();
-  CheckEqual(output, {0}, "");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor7: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "");
 
   input = std::make_shared<Tensor>("Hello中国Hello世界");
-  s = keep_whitespace_op->Compute(input, &output); EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 4);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor8: " << output->ToString();
-  CheckEqual(output, {0}, "Hello");
-  CheckEqual(output, {1}, "中国");
-  CheckEqual(output, {2}, "Hello");
-  CheckEqual(output, {3}, "世界");
+  output.clear();
+  s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output); EXPECT_TRUE(s.IsOk());
+  EXPECT_EQ(output[0]->Size(), 4);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor8: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "Hello");
+  CheckEqual(output[0], {1}, "中国");
+  CheckEqual(output[0], {2}, "Hello");
+  CheckEqual(output[0], {3}, "世界");
 
   input = std::make_shared<Tensor>("   ");
-  s = keep_whitespace_op->Compute(input, &output);
+  output.clear();
+  s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor10: " << output->ToString();
-  CheckEqual(output, {0}, "   ");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor10: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "   ");
   input = std::make_shared<Tensor>("   ");
-  s = skip_whitespace_op->Compute(input, &output);
+  output.clear();
+  s = skip_whitespace_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
-  EXPECT_EQ(output->Size(), 1);
-  EXPECT_EQ(output->Rank(), 1);
-  MS_LOG(INFO) << "Out tensor11: " << output->ToString();
-  CheckEqual(output, {0}, "");
+  EXPECT_EQ(output[0]->Size(), 1);
+  EXPECT_EQ(output[0]->Rank(), 1);
+  MS_LOG(INFO) << "Out tensor11: " << output[0]->ToString();
+  CheckEqual(output[0], {0}, "");
 }
 
 TEST_F(MindDataTestTokenizerOp, TestCaseFold) {
@@ -321,10 +339,10 @@ TEST_F(MindDataTestTokenizerOp, TestRegexReplace) {
 
 TEST_F(MindDataTestTokenizerOp, TestRegexTokenizer) {
   MS_LOG(INFO) << "Doing TestRegexTokenizerOp.";
-  std::unique_ptr<RegexTokenizerOp> regex_tokenizer_op(new RegexTokenizerOp("\\p{Cc}|\\p{Cf}|\\s+", ""));
+  std::unique_ptr<RegexTokenizerOp> regex_tokenizer_op(new RegexTokenizerOp("\\p{Cc}|\\p{Cf}|\\s+", "", true));
   std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. \n 中国\t北京");
-  std::shared_ptr<Tensor> output;
-  Status s = regex_tokenizer_op->Compute(input, &output);
+  TensorRow output;
+  Status s = regex_tokenizer_op->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
 }
 
@@ -332,9 +350,10 @@ TEST_F(MindDataTestTokenizerOp, TestBasicTokenizer) {
   MS_LOG(INFO) << "Doing TestBasicTokenizer.";
   //bool lower_case, bool keep_whitespace, 
   // NormalizeForm  normalization_form, bool preserve_unused_token
-  std::unique_ptr<BasicTokenizerOp> basic_tokenizer(new BasicTokenizerOp(true, true, NormalizeForm::kNone, false));
+  std::unique_ptr<BasicTokenizerOp> basic_tokenizer(new BasicTokenizerOp(true, true, NormalizeForm::kNone, false,
+                                                                         true));
   std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. 中国\t北京");
-  std::shared_ptr<Tensor> output;
-  Status s = basic_tokenizer->Compute(input, &output);
+  TensorRow output;
+  Status s = basic_tokenizer->Compute(TensorRow(0, {input}), &output);
   EXPECT_TRUE(s.IsOk());
 }
\ No newline at end of file
diff --git a/tests/ut/cpp/dataset/treap_test.cc b/tests/ut/cpp/dataset/treap_test.cc
index b454ab108eb..b9c534719c8 100644
--- a/tests/ut/cpp/dataset/treap_test.cc
+++ b/tests/ut/cpp/dataset/treap_test.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/util/treap.h"
+#include "minddata/dataset/util/treap.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
diff --git a/tests/ut/cpp/dataset/trucate_pair_test.cc b/tests/ut/cpp/dataset/trucate_pair_test.cc
index 95e2aaa11b3..af7e61c16aa 100644
--- a/tests/ut/cpp/dataset/trucate_pair_test.cc
+++ b/tests/ut/cpp/dataset/trucate_pair_test.cc
@@ -15,12 +15,12 @@
  */
 #include <memory>
 #include <string>
-#include "dataset/core/client.h"
+#include "minddata/dataset/core/client.h"
 #include "common/common.h"
 #include "gtest/gtest.h"
 #include "securec.h"
-#include "dataset/core/tensor.h"
-#include "mindspore/ccsrc/dataset/text/kernels/truncate_sequence_pair_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.h"
 
 using namespace mindspore::dataset;
 
diff --git a/tests/ut/cpp/dataset/type_cast_op_test.cc b/tests/ut/cpp/dataset/type_cast_op_test.cc
index 543eb71637f..a94a7fedbab 100644
--- a/tests/ut/cpp/dataset/type_cast_op_test.cc
+++ b/tests/ut/cpp/dataset/type_cast_op_test.cc
@@ -17,12 +17,12 @@
 #include <string>
 #include "common/common.h"
 #include "common/cvop_common.h"
-#include "dataset/kernels/data/type_cast_op.h"
-#include "dataset/core/client.h"
-#include "dataset/core/cv_tensor.h"
-#include "dataset/core/data_type.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/pybind_support.h"
+#include "minddata/dataset/kernels/data/type_cast_op.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/pybind_support.h"
 #include "gtest/gtest.h"
 #include "securec.h"
 
diff --git a/tests/ut/cpp/dataset/voc_op_test.cc b/tests/ut/cpp/dataset/voc_op_test.cc
index 05dc28b4875..4bb212ffc79 100644
--- a/tests/ut/cpp/dataset/voc_op_test.cc
+++ b/tests/ut/cpp/dataset/voc_op_test.cc
@@ -20,18 +20,18 @@
 
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/core/client.h"
-#include "dataset/core/global_context.h"
-#include "dataset/engine/datasetops/source/voc_op.h"
-#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
-#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
-#include "dataset/util/path.h"
-#include "dataset/util/status.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/voc_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"
diff --git a/tests/ut/cpp/dataset/weighted_random_sampler_test.cc b/tests/ut/cpp/dataset/weighted_random_sampler_test.cc
index d146ed10ac2..bb3079aec80 100644
--- a/tests/ut/cpp/dataset/weighted_random_sampler_test.cc
+++ b/tests/ut/cpp/dataset/weighted_random_sampler_test.cc
@@ -16,11 +16,11 @@
 #include "common/common.h"
 #include "gtest/gtest.h"
 
-#include "dataset/core/constants.h"
-#include "dataset/core/tensor.h"
-#include "dataset/engine/data_buffer.h"
-#include "dataset/engine/datasetops/source/sampler/sampler.h"
-#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
 #include "utils/log_adapter.h"
 
 #include <vector>
diff --git a/tests/ut/cpp/dataset/zip_op_test.cc b/tests/ut/cpp/dataset/zip_op_test.cc
index b3873413986..8d74cb09690 100644
--- a/tests/ut/cpp/dataset/zip_op_test.cc
+++ b/tests/ut/cpp/dataset/zip_op_test.cc
@@ -21,17 +21,17 @@
 #include <memory>
 #include <string>
 #include <thread>
-#include "dataset/core/client.h"
-#include "dataset/core/constants.h"
-#include "dataset/engine/datasetops/map_op.h"
-#include "dataset/engine/datasetops/zip_op.h"
-#include "dataset/core/tensor.h"
-#include "dataset/core/config_manager.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/engine/datasetops/map_op.h"
+#include "minddata/dataset/engine/datasetops/zip_op.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/core/config_manager.h"
 #include "common/common.h"
 #include "common/utils.h"
-#include "dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_buffer.h"
 #include "gtest/gtest.h"
-#include "dataset/core/global_context.h"
+#include "minddata/dataset/core/global_context.h"
 #include "utils/log_adapter.h"
 
 namespace common = mindspore::common;
@@ -58,7 +58,7 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) {
   auto my_tree = std::make_shared<ExecutionTree>();
   // Creating TFReaderOp
 
-  std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images_1/train-0000-of-0001.data";
+  std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
   std::string dataset_path2 = datasets_root_path_ + "/testBatchDataset/test.data";
   std::shared_ptr<TFReaderOp> my_tfreader_op;
   rc = TFReaderOp::Builder()
@@ -142,7 +142,7 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) {
   MS_LOG(INFO) << "UT test TestZipRepeat.";
   auto my_tree = std::make_shared<ExecutionTree>();
 
-  std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images_1/train-0000-of-0001.data";
+  std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
   std::string dataset_path2 = datasets_root_path_ + "/testBatchDataset/test.data";
   std::shared_ptr<TFReaderOp> my_tfreader_op;
   rc = TFReaderOp::Builder()
diff --git a/tests/ut/cpp/device/ascend_kernel_runtime_test.cc b/tests/ut/cpp/device/ascend_kernel_runtime_test.cc
index effa0b212da..2aa95128088 100644
--- a/tests/ut/cpp/device/ascend_kernel_runtime_test.cc
+++ b/tests/ut/cpp/device/ascend_kernel_runtime_test.cc
@@ -18,7 +18,7 @@
 
 #include "common/common_test.h"
 
-#include "device/kernel_runtime.h"
+#include "runtime/device/kernel_runtime.h"
 #include "./common.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/device/ascend_profiling_test.cc b/tests/ut/cpp/device/ascend_profiling_test.cc
index 2829a5fd4ae..f862d84c4a0 100644
--- a/tests/ut/cpp/device/ascend_profiling_test.cc
+++ b/tests/ut/cpp/device/ascend_profiling_test.cc
@@ -18,12 +18,12 @@
 
 #include "./prof_reporter.h"
 #include "common/common_test.h"
-#include "device/ascend/profiling/profiling_manager.h"
+#include "runtime/device/ascend/profiling/profiling_manager.h"
 #include "./common.h"
 #define private public
-#include "device/ascend/profiling/plugin_impl.h"
+#include "runtime/device/ascend/profiling/plugin_impl.h"
 #undef private
-#include "device/ascend/profiling/profiling_engine_impl.h"
+#include "runtime/device/ascend/profiling/profiling_engine_impl.h"
 
 namespace mindspore {
 namespace device {
diff --git a/tests/ut/cpp/ir/anf_test.cc b/tests/ut/cpp/ir/anf_test.cc
index c649518e219..9b217a23218 100644
--- a/tests/ut/cpp/ir/anf_test.cc
+++ b/tests/ut/cpp/ir/anf_test.cc
@@ -19,7 +19,7 @@
 #include "common/common_test.h"
 
 #include "ir/anf.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "./common.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/ir/clone_test.cc b/tests/ut/cpp/ir/clone_test.cc
index bb8cae7fbb9..20da3fb8b57 100644
--- a/tests/ut/cpp/ir/clone_test.cc
+++ b/tests/ut/cpp/ir/clone_test.cc
@@ -21,7 +21,7 @@
 #include "ir/manager.h"
 #include "utils/log_adapter.h"
 #include "ir/func_graph_cloner.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "utils/graph_utils.h"
 #include "debug/draw.h"
 #include "./common.h"
diff --git a/tests/ut/cpp/ir/manager_test.cc b/tests/ut/cpp/ir/manager_test.cc
index 04b584ec102..3e6d1a312c0 100644
--- a/tests/ut/cpp/ir/manager_test.cc
+++ b/tests/ut/cpp/ir/manager_test.cc
@@ -18,8 +18,8 @@
 #include "ir/dtype.h"
 #include "ir/manager.h"
 #include "ir/func_graph_cloner.h"
-#include "pipeline/parse/parse.h"
-#include "operator/ops.h"
+#include "pipeline/jit/parse/parse.h"
+#include "frontend/operator/ops.h"
 #include "utils/log_adapter.h"
 #include "debug/draw.h"
 #include "debug/label.h"
diff --git a/tests/ut/cpp/ir/value_test.cc b/tests/ut/cpp/ir/value_test.cc
index a71ef7a57ff..b4ed5f438ef 100644
--- a/tests/ut/cpp/ir/value_test.cc
+++ b/tests/ut/cpp/ir/value_test.cc
@@ -21,7 +21,7 @@
 
 #include "common/common_test.h"
 #include "ir/value.h"
-#include "pipeline/static_analysis/abstract_value.h"
+#include "abstract/abstract_value.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/kernel/common_utils_test.cc b/tests/ut/cpp/kernel/common_utils_test.cc
index 4bc05b5c05d..83f7c59e523 100644
--- a/tests/ut/cpp/kernel/common_utils_test.cc
+++ b/tests/ut/cpp/kernel/common_utils_test.cc
@@ -16,7 +16,7 @@
 
 #include <vector>
 #include "common/common_test.h"
-#include "kernel/common_utils.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/tests/ut/cpp/kernel/cpu/sparse_apply_adam_cpu_kernel_test.cc b/tests/ut/cpp/kernel/cpu/sparse_apply_adam_cpu_kernel_test.cc
index 2a6b80f9e7e..e5cba862306 100644
--- a/tests/ut/cpp/kernel/cpu/sparse_apply_adam_cpu_kernel_test.cc
+++ b/tests/ut/cpp/kernel/cpu/sparse_apply_adam_cpu_kernel_test.cc
@@ -18,7 +18,7 @@
 #include "common/common_test.h"
 #define private public
 #define protected public
-#include "kernel/cpu/sparse_apply_adam_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.h"
 #undef private
 #undef protected
 
@@ -58,9 +58,12 @@ class SparseApplyAdamCpuKernelTest : public UT::Common {
     inputs_.push_back(CreateKernelAddress(indices.data()));
   }
 
-  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices, std::vector<float> &m_t) {
+  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices, std::vector<float> &tmp_grad,
+                              std::vector<int> &tmp_indices, std::vector<float> &m_t) {
     workspace_.push_back(CreateKernelAddress(new_grad.data()));
     workspace_.push_back(CreateKernelAddress(new_indices.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_grad.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_indices.data()));
     workspace_.push_back(CreateKernelAddress(m_t.data()));
   }
 
@@ -95,8 +98,10 @@ TEST_F(SparseApplyAdamCpuKernelTest, dense_test) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
   std::vector<float> m_t(3 * 3 * 3);
-  CreateWorkspaceAddress(new_grad, new_indices, m_t);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices, m_t);
   sparse_adam_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.999684) < 1e-6);
@@ -120,8 +125,10 @@ TEST_F(SparseApplyAdamCpuKernelTest, sparse_test1) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
   std::vector<float> m_t(3 * 3 * 3);
-  CreateWorkspaceAddress(new_grad, new_indices, m_t);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices, m_t);
   sparse_adam_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.999684) < 1e-6);
@@ -149,8 +156,10 @@ TEST_F(SparseApplyAdamCpuKernelTest, sparse_test2) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
   std::vector<float> m_t(3 * 3 * 3);
-  CreateWorkspaceAddress(new_grad, new_indices, m_t);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices, m_t);
   sparse_adam_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.999715) < 1e-6);
diff --git a/tests/ut/cpp/kernel/cpu/sparse_apply_ftrl_cpu_kernel_test.cc b/tests/ut/cpp/kernel/cpu/sparse_apply_ftrl_cpu_kernel_test.cc
index c5c2394538d..230c8cbf9eb 100644
--- a/tests/ut/cpp/kernel/cpu/sparse_apply_ftrl_cpu_kernel_test.cc
+++ b/tests/ut/cpp/kernel/cpu/sparse_apply_ftrl_cpu_kernel_test.cc
@@ -18,7 +18,7 @@
 #include "common/common_test.h"
 #define private public
 #define protected public
-#include "kernel/cpu/sparse_apply_ftrl_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.h"
 #undef private
 #undef protected
 
@@ -56,9 +56,12 @@ class SparseApplyFtrlCpuKernelTest : public UT::Common {
     inputs_.push_back(CreateKernelAddress(indices.data()));
   }
 
-  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices) {
+  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices, std::vector<float> &tmp_grad,
+                              std::vector<int> &tmp_indices) {
     workspace_.push_back(CreateKernelAddress(new_grad.data()));
     workspace_.push_back(CreateKernelAddress(new_indices.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_grad.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_indices.data()));
   }
 
   std::vector<float> var_;
@@ -86,7 +89,9 @@ TEST_F(SparseApplyFtrlCpuKernelTest, dense_test) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_ftrl_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.291479) < 1e-6);
@@ -110,7 +115,9 @@ TEST_F(SparseApplyFtrlCpuKernelTest, sparse_test1) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_ftrl_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.291479) < 1e-6);
@@ -138,7 +145,9 @@ TEST_F(SparseApplyFtrlCpuKernelTest, sparse_test2) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_ftrl_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_EQ(var_[i], 1.0);
diff --git a/tests/ut/cpp/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel_test.cc b/tests/ut/cpp/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel_test.cc
index 1765ed896f6..a829ead90ed 100644
--- a/tests/ut/cpp/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel_test.cc
+++ b/tests/ut/cpp/kernel/cpu/sparse_apply_lazy_adam_cpu_kernel_test.cc
@@ -18,7 +18,7 @@
 #include "common/common_test.h"
 #define private public
 #define protected public
-#include "kernel/cpu/sparse_apply_lazy_adam_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.h"
 #undef private
 #undef protected
 
@@ -58,9 +58,12 @@ class SparseApplyLazyAdamCpuKernelTest : public UT::Common {
     inputs_.push_back(CreateKernelAddress(indices.data()));
   }
 
-  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices) {
+  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices, std::vector<float> &tmp_grad,
+                              std::vector<int> &tmp_indices) {
     workspace_.push_back(CreateKernelAddress(new_grad.data()));
     workspace_.push_back(CreateKernelAddress(new_indices.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_grad.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_indices.data()));
   }
 
   std::vector<float> var_;
@@ -94,7 +97,9 @@ TEST_F(SparseApplyLazyAdamCpuKernelTest, dense_test) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_lazy_adam_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.999684) < 1e-6);
@@ -118,7 +123,9 @@ TEST_F(SparseApplyLazyAdamCpuKernelTest, sparse_test1) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_lazy_adam_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.999684) < 1e-6);
@@ -146,7 +153,9 @@ TEST_F(SparseApplyLazyAdamCpuKernelTest, sparse_test2) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_lazy_adam_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_EQ(var_[i], 1.0);
diff --git a/tests/ut/cpp/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel_test.cc b/tests/ut/cpp/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel_test.cc
index 23f66db58cb..64bd5d3ef31 100644
--- a/tests/ut/cpp/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel_test.cc
+++ b/tests/ut/cpp/kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel_test.cc
@@ -18,7 +18,7 @@
 #include "common/common_test.h"
 #define private public
 #define protected public
-#include "kernel/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h"
 #undef private
 #undef protected
 
@@ -54,9 +54,12 @@ class SparseApplyProximalAdagradCpuKernelTest : public UT::Common {
     inputs_.push_back(CreateKernelAddress(indices.data()));
   }
 
-  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices) {
+  void CreateWorkspaceAddress(std::vector<float> &new_grad, std::vector<int> &new_indices, std::vector<float> &tmp_grad,
+                              std::vector<int> &tmp_indices) {
     workspace_.push_back(CreateKernelAddress(new_grad.data()));
     workspace_.push_back(CreateKernelAddress(new_indices.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_grad.data()));
+    workspace_.push_back(CreateKernelAddress(tmp_indices.data()));
   }
 
   std::vector<float> var_;
@@ -85,7 +88,9 @@ TEST_F(SparseApplyProximalAdagradCpuKernelTest, dense_test) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_proximal_adagrad_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.9929289) < 1e-6);
@@ -108,7 +113,9 @@ TEST_F(SparseApplyProximalAdagradCpuKernelTest, sparse_test1) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_proximal_adagrad_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_TRUE(std::fabs(var_[i] - 0.9929289) < 1e-6);
@@ -135,7 +142,9 @@ TEST_F(SparseApplyProximalAdagradCpuKernelTest, sparse_test2) {
   CreateInputAddress(indices);
   std::vector<float> new_grad(3 * 3 * 3);
   std::vector<int> new_indices(3);
-  CreateWorkspaceAddress(new_grad, new_indices);
+  std::vector<float> tmp_grad(3 * 3 * 3);
+  std::vector<int> tmp_indices(3);
+  CreateWorkspaceAddress(new_grad, new_indices, tmp_grad, tmp_indices);
   sparse_proximal_adagrad_->Launch(inputs_, workspace_, outputs_);
   for (size_t i = 0; i < 3 * 3; ++i) {
     EXPECT_EQ(var_[i], 1.0);
diff --git a/tests/ut/cpp/mindrecord/ut_common.h b/tests/ut/cpp/mindrecord/ut_common.h
index 8b244bf87ae..ee943ab88e0 100644
--- a/tests/ut/cpp/mindrecord/ut_common.h
+++ b/tests/ut/cpp/mindrecord/ut_common.h
@@ -25,10 +25,10 @@
 #include "common/utils.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_index.h"
-#include "mindrecord/include/shard_header.h" 
-#include "mindrecord/include/shard_index_generator.h"
-#include "mindrecord/include/shard_writer.h"
+#include "minddata/mindrecord/include/shard_index.h"
+#include "minddata/mindrecord/include/shard_header.h"
+#include "minddata/mindrecord/include/shard_index_generator.h"
+#include "minddata/mindrecord/include/shard_writer.h"
 using json = nlohmann::json;
 using std::ifstream;
 using std::pair;
diff --git a/tests/ut/cpp/mindrecord/ut_shard.cc b/tests/ut/cpp/mindrecord/ut_shard.cc
index b8c229e82f7..11492e9f285 100644
--- a/tests/ut/cpp/mindrecord/ut_shard.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard.cc
@@ -23,10 +23,10 @@
 #include "configuration.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_index.h"
-#include "mindrecord/include/shard_header.h"
-#include "mindrecord/include/shard_statistics.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_index.h"
+#include "minddata/mindrecord/include/shard_header.h"
+#include "minddata/mindrecord/include/shard_statistics.h"
 #include "securec.h"
 #include "ut_common.h"
 
diff --git a/tests/ut/cpp/mindrecord/ut_shard_header_test.cc b/tests/ut/cpp/mindrecord/ut_shard_header_test.cc
index cea71c34b7c..2ff3d1655d3 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_header_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_header_test.cc
@@ -29,13 +29,13 @@
 
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_reader.h"
-#include "mindrecord/include/shard_writer.h"
-#include "mindrecord/include/shard_index.h"
-#include "mindrecord/include/shard_header.h"
-#include "mindrecord/include/shard_schema.h"
-#include "mindrecord/include/shard_statistics.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_writer.h"
+#include "minddata/mindrecord/include/shard_index.h"
+#include "minddata/mindrecord/include/shard_header.h"
+#include "minddata/mindrecord/include/shard_schema.h"
+#include "minddata/mindrecord/include/shard_statistics.h"
 #include "securec.h"
 #include "ut_common.h"
 
diff --git a/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc b/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc
index 140fff4166c..8e264aafa0a 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc
@@ -29,10 +29,10 @@
 
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_error.h"
-#include "mindrecord/include/shard_index_generator.h"
-#include "mindrecord/include/shard_index.h"
-#include "mindrecord/include/shard_statistics.h"
+#include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_index_generator.h"
+#include "minddata/mindrecord/include/shard_index.h"
+#include "minddata/mindrecord/include/shard_statistics.h"
 #include "securec.h"
 #include "ut_common.h"
 
diff --git a/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc b/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc
index 7fe60c3bfa6..4501ea0800d 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc
@@ -24,11 +24,11 @@
 #include "common/utils.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_category.h"
-#include "mindrecord/include/shard_pk_sample.h"
-#include "mindrecord/include/shard_reader.h"
-#include "mindrecord/include/shard_sample.h"
-#include "mindrecord/include/shard_shuffle.h"
+#include "minddata/mindrecord/include/shard_category.h"
+#include "minddata/mindrecord/include/shard_pk_sample.h"
+#include "minddata/mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_shuffle.h"
 #include "ut_common.h"
 
 using mindspore::LogStream;
diff --git a/tests/ut/cpp/mindrecord/ut_shard_page_test.cc b/tests/ut/cpp/mindrecord/ut_shard_page_test.cc
index dabd3d819fe..a7e444c80f5 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_page_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_page_test.cc
@@ -21,7 +21,7 @@
 
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_page.h"
+#include "minddata/mindrecord/include/shard_page.h"
 #include "ut_common.h"
 
 using json = nlohmann::json;
diff --git a/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc b/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc
index c532fe28b8c..8b5eb2cf690 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc
@@ -24,8 +24,8 @@
 #include "common/utils.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_reader.h"
-#include "mindrecord/include/shard_sample.h"
+#include "minddata/mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_sample.h"
 #include "ut_common.h"
 
 using mindspore::LogStream;
diff --git a/tests/ut/cpp/mindrecord/ut_shard_schema_test.cc b/tests/ut/cpp/mindrecord/ut_shard_schema_test.cc
index 8d9654a5ef4..6863a25791d 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_schema_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_schema_test.cc
@@ -29,9 +29,9 @@
 
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_page.h"
-#include "mindrecord/include/shard_schema.h"
-#include "mindrecord/include/shard_statistics.h"
+#include "minddata/mindrecord/include/shard_page.h"
+#include "minddata/mindrecord/include/shard_schema.h"
+#include "minddata/mindrecord/include/shard_statistics.h"
 #include "securec.h"
 #include "ut_common.h"
 
diff --git a/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc b/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc
index 3fa68123520..6b99e44d890 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc
@@ -30,7 +30,7 @@
 #include "common/utils.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_segment.h"
+#include "minddata/mindrecord/include/shard_segment.h"
 #include "ut_common.h"
 
 using mindspore::LogStream;
diff --git a/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc b/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc
index 159efbf2f80..046b4f93d5c 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc
@@ -24,9 +24,9 @@
 #include "common/utils.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
-#include "mindrecord/include/shard_reader.h"
-#include "mindrecord/include/shard_writer.h"
-#include "mindrecord/include/shard_index_generator.h"
+#include "minddata/mindrecord/include/shard_reader.h"
+#include "minddata/mindrecord/include/shard_writer.h"
+#include "minddata/mindrecord/include/shard_index_generator.h"
 #include "securec.h"
 #include "ut_common.h"
 
diff --git a/tests/ut/cpp/operator/cc_implementations_test.cc b/tests/ut/cpp/operator/cc_implementations_test.cc
index bac885db887..4bc5aea964b 100644
--- a/tests/ut/cpp/operator/cc_implementations_test.cc
+++ b/tests/ut/cpp/operator/cc_implementations_test.cc
@@ -18,7 +18,7 @@
 #include <vector>
 
 #include "common/common_test.h"
-#include "operator/cc_implementations.h"
+#include "frontend/operator/cc_implementations.h"
 
 namespace mindspore {
 namespace prim {
diff --git a/tests/ut/cpp/operator/composite_test.cc b/tests/ut/cpp/operator/composite_test.cc
index 8ca318300a8..a2108998bc6 100644
--- a/tests/ut/cpp/operator/composite_test.cc
+++ b/tests/ut/cpp/operator/composite_test.cc
@@ -18,10 +18,10 @@
 #include "common/common_test.h"
 #include "ir/anf.h"
 #include "ir/value.h"
-#include "operator/composite/composite.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/prim.h"
-#include "pipeline/static_analysis/abstract_function.h"
+#include "frontend/operator/composite/composite.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/abstract_function.h"
 #include "debug/trace.h"
 
 namespace mindspore {
@@ -127,11 +127,17 @@ TEST_F(TestComposite, test_TupleSlice_arg_one_number) {
   try {
     trace::ClearTraceStack();
     engine_->Run(tupleSliceGraphPtr, args_spec_list);
-    FAIL() << "Excepted exception :Args type is wrong";
+    FAIL() << "Excepted exception: Args type is wrong";
   } catch (pybind11::type_error const &err) {
     ASSERT_TRUE(true);
+  } catch (std::runtime_error const &err) {
+    if (std::strstr(err.what(), "TypeError") != nullptr) {
+      ASSERT_TRUE(true);
+    } else {
+      FAIL() << "Excepted exception: Args type is wrong, message: " << err.what();
+    }
   } catch (...) {
-    FAIL() << "Excepted exception :Args type is wrong";
+    FAIL() << "Excepted exception: Args type is wrong";
   }
 }
 
diff --git a/tests/ut/cpp/operator/grad_implementations_test.cc b/tests/ut/cpp/operator/grad_implementations_test.cc
index e9035e63b6a..f55553ab721 100644
--- a/tests/ut/cpp/operator/grad_implementations_test.cc
+++ b/tests/ut/cpp/operator/grad_implementations_test.cc
@@ -20,7 +20,7 @@
 #include "ir/value.h"
 #include "ir/manager.h"
 #include "common/common_test.h"
-#include "optimizer/ad/dfunctor.h"
+#include "frontend/optimizer/ad/dfunctor.h"
 #include "debug/draw.h"
 #include "common/py_func_graph_fetcher.h"
 
diff --git a/tests/ut/cpp/operator/ops_test.cc b/tests/ut/cpp/operator/ops_test.cc
index 1d1389b54a7..789b1cab252 100644
--- a/tests/ut/cpp/operator/ops_test.cc
+++ b/tests/ut/cpp/operator/ops_test.cc
@@ -19,8 +19,8 @@
 
 #include "common/common_test.h"
 #include "ir/value.h"
-#include "ir/primitive.h"
-#include "operator/ops.h"
+#include "ir/primitive_py.h"
+#include "frontend/operator/ops.h"
 #include "./common.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/operator/prim2func_test.cc b/tests/ut/cpp/operator/prim2func_test.cc
index 8f7c73a064e..3952128b527 100644
--- a/tests/ut/cpp/operator/prim2func_test.cc
+++ b/tests/ut/cpp/operator/prim2func_test.cc
@@ -21,7 +21,7 @@
 
 #include "ir/anf.h"
 #include "ir/dtype.h"
-#include "operator/prim_to_function.h"
+#include "frontend/operator/prim_to_function.h"
 
 namespace mindspore {
 namespace prim {
diff --git a/tests/ut/cpp/optimizer/ad/ad_test.cc b/tests/ut/cpp/optimizer/ad/ad_test.cc
index 34612b54744..3f861d36049 100644
--- a/tests/ut/cpp/optimizer/ad/ad_test.cc
+++ b/tests/ut/cpp/optimizer/ad/ad_test.cc
@@ -16,7 +16,7 @@
 #include <iostream>
 #include <unordered_map>
 
-#include "optimizer/ad/grad.h"
+#include "frontend/optimizer/ad/grad.h"
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "ir/manager.h"
@@ -24,10 +24,10 @@
 #include "ir/func_graph_cloner.h"
 #include "utils/log_adapter.h"
 #include "utils/graph_utils.h"
-#include "pipeline/resource.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace ad {
diff --git a/tests/ut/cpp/optimizer/cconv_test.cc b/tests/ut/cpp/optimizer/cconv_test.cc
index 8bd6957e85f..c0044090580 100644
--- a/tests/ut/cpp/optimizer/cconv_test.cc
+++ b/tests/ut/cpp/optimizer/cconv_test.cc
@@ -20,7 +20,7 @@
 
 #include "ir/func_graph_cloner.h"
 #include "utils/log_adapter.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/optimizer/clean_test.cc b/tests/ut/cpp/optimizer/clean_test.cc
index c4f393c233c..82bec1b5a8a 100644
--- a/tests/ut/cpp/optimizer/clean_test.cc
+++ b/tests/ut/cpp/optimizer/clean_test.cc
@@ -19,9 +19,9 @@
 #include "common/py_func_graph_fetcher.h"
 
 #include "utils/log_adapter.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
-#include "optimizer/clean.h"
+#include "frontend/optimizer/clean.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/optimizer/lib_test.cc b/tests/ut/cpp/optimizer/lib_test.cc
index bc8561f1711..751b301283c 100644
--- a/tests/ut/cpp/optimizer/lib_test.cc
+++ b/tests/ut/cpp/optimizer/lib_test.cc
@@ -25,11 +25,11 @@
 #include "ir/manager.h"
 #include "ir/value.h"
 #include "ir/visitor.h"
-#include "operator/ops.h"
-#include "optimizer/irpass.h"
-#include "pipeline/resource.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "pipeline/jit/resource.h"
 #include "debug/draw.h"
-#include "pipeline/parse/data_converter.h"
+#include "pipeline/jit/parse/data_converter.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/optimizer/opt_test.cc b/tests/ut/cpp/optimizer/opt_test.cc
index 2428d0dddb3..c329adc4a5a 100644
--- a/tests/ut/cpp/optimizer/opt_test.cc
+++ b/tests/ut/cpp/optimizer/opt_test.cc
@@ -22,13 +22,13 @@
 #include "ir/anf.h"
 #include "ir/visitor.h"
 #include "ir/func_graph_cloner.h"
-#include "optimizer/opt.h"
-#include "optimizer/irpass.h"
-#include "optimizer/irpass/arithmetic_simplify.h"
+#include "frontend/optimizer/opt.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/irpass/arithmetic_simplify.h"
 
 #include "debug/draw.h"
-#include "operator/ops.h"
-#include "optimizer/cse.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/cse.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/optimizer/optimizer_test.cc b/tests/ut/cpp/optimizer/optimizer_test.cc
index ca7c589d47d..c5c99531e4c 100644
--- a/tests/ut/cpp/optimizer/optimizer_test.cc
+++ b/tests/ut/cpp/optimizer/optimizer_test.cc
@@ -20,10 +20,10 @@
 #include "common/py_func_graph_fetcher.h"
 
 #include "ir/anf.h"
-#include "operator/ops.h"
-#include "optimizer/cse.h"
-#include "optimizer/optimizer.h"
-#include "optimizer/irpass.h"
+#include "frontend/operator/ops.h"
+#include "frontend/optimizer/cse.h"
+#include "frontend/optimizer/optimizer.h"
+#include "frontend/optimizer/irpass.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/parallel/auto_parallel/dp_algo_test.cc b/tests/ut/cpp/parallel/auto_parallel/dp_algo_test.cc
index 04629936729..a500afc8591 100644
--- a/tests/ut/cpp/parallel/auto_parallel/dp_algo_test.cc
+++ b/tests/ut/cpp/parallel/auto_parallel/dp_algo_test.cc
@@ -15,12 +15,12 @@
  */
 
 #include "common/common_test.h"
-#include "parallel/device_manager.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/ops_info/matmul_info.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/ops_info/tmp_identity_info.h"
-#include "parallel/auto_parallel/dp_algo_costmodel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/ops_info/tmp_identity_info.h"
+#include "frontend/parallel/auto_parallel/dp_algo_costmodel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/auto_parallel/edge_costmodel_test.cc b/tests/ut/cpp/parallel/auto_parallel/edge_costmodel_test.cc
index 291539c27d9..190a189a2df 100644
--- a/tests/ut/cpp/parallel/auto_parallel/edge_costmodel_test.cc
+++ b/tests/ut/cpp/parallel/auto_parallel/edge_costmodel_test.cc
@@ -16,9 +16,9 @@
 
 #include "common/common_test.h"
 #include "ir/dtype/number.h"
-#include "parallel/device_manager.h"
-#include "parallel/auto_parallel/edge_costmodel.h"
-#include "parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/auto_parallel/edge_costmodel.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc b/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc
index 78d05c7235e..7d63f03179e 100644
--- a/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc
+++ b/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc
@@ -15,9 +15,9 @@
  */
 
 #include "common/common_test.h"
-#include "parallel/device_manager.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
-#include "parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc b/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc
index 919c5b43eca..b9b6bb67d9a 100644
--- a/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc
+++ b/tests/ut/cpp/parallel/auto_parallel/operator_costmodel_test.cc
@@ -15,10 +15,10 @@
  */
 
 #include <common/common_test.h>
-#include "parallel/tensor_layout/tensor_layout.h"
-#include "parallel/tensor_layout/tensor_info.h"
-#include "parallel/auto_parallel/operator_costmodel.h"
-#include "parallel/device_manager.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/tensor_layout/tensor_info.h"
+#include "frontend/parallel/auto_parallel/operator_costmodel.h"
+#include "frontend/parallel/device_manager.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc b/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc
index 1eb65b468fb..7942fa2a100 100644
--- a/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc
+++ b/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc
@@ -15,9 +15,9 @@
  */
 
 #include "common/common_test.h"
-#include "parallel/auto_parallel/rec_core/rec_tensor.h"
-#include "parallel/auto_parallel/rec_core/rec_graph.h"
-#include "parallel/auto_parallel/rec_core/rec_partition.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_tensor.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_graph.h"
+#include "frontend/parallel/auto_parallel/rec_core/rec_partition.h"
 #include <memory>
 #include "ir/value.h"
 
diff --git a/tests/ut/cpp/parallel/device_manager_test.cc b/tests/ut/cpp/parallel/device_manager_test.cc
index 056896f5144..0c048d647ba 100644
--- a/tests/ut/cpp/parallel/device_manager_test.cc
+++ b/tests/ut/cpp/parallel/device_manager_test.cc
@@ -15,9 +15,9 @@
  */
 #include <list>
 #include "common/common_test.h"
-#include "parallel/device.h"
-#include "parallel/device_manager.h"
-#include "parallel/group_manager.h"
+#include "frontend/parallel/device.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/group_manager.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/device_matrix_test.cc b/tests/ut/cpp/parallel/device_matrix_test.cc
index 877a211df8c..57a438e76e9 100644
--- a/tests/ut/cpp/parallel/device_matrix_test.cc
+++ b/tests/ut/cpp/parallel/device_matrix_test.cc
@@ -16,7 +16,7 @@
 #include <vector>
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/device_matrix.h"
+#include "frontend/parallel/device_matrix.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/group_manager_test.cc b/tests/ut/cpp/parallel/group_manager_test.cc
index e3d2b3a364c..fa4abfcb7eb 100644
--- a/tests/ut/cpp/parallel/group_manager_test.cc
+++ b/tests/ut/cpp/parallel/group_manager_test.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 #include <list>
-#include "parallel/device_manager.h"
+#include "frontend/parallel/device_manager.h"
 #include "common/common_test.h"
-#include "parallel/device.h"
-#include "parallel/group_manager.h"
+#include "frontend/parallel/device.h"
+#include "frontend/parallel/group_manager.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/activation_info_test.cc b/tests/ut/cpp/parallel/ops_info/activation_info_test.cc
index a9fe9b4c489..5f09de9e487 100644
--- a/tests/ut/cpp/parallel/ops_info/activation_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/activation_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/activation_test.cc b/tests/ut/cpp/parallel/ops_info/activation_test.cc
index 9af72037991..9d129b7a186 100644
--- a/tests/ut/cpp/parallel/ops_info/activation_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/activation_test.cc
@@ -18,9 +18,9 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/device_manager.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/device_manager.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc b/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc
index e54d1f24235..e49ed4e79d0 100644
--- a/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/gelu_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/generate_strategy_test.cc b/tests/ut/cpp/parallel/ops_info/generate_strategy_test.cc
index 947ad60ccad..125723868ae 100644
--- a/tests/ut/cpp/parallel/ops_info/generate_strategy_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/generate_strategy_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/arithmetic_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/arithmetic_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/get_next_info_test.cc b/tests/ut/cpp/parallel/ops_info/get_next_info_test.cc
index 503edf2edad..029e0f2dc69 100644
--- a/tests/ut/cpp/parallel/ops_info/get_next_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/get_next_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/get_next_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/get_next_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/l2_normalize_info_test.cc b/tests/ut/cpp/parallel/ops_info/l2_normalize_info_test.cc
index b59481e1f60..7037a856996 100644
--- a/tests/ut/cpp/parallel/ops_info/l2_normalize_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/l2_normalize_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/l2_normalize_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/l2_normalize_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/log_softmax_info_test.cc b/tests/ut/cpp/parallel/ops_info/log_softmax_info_test.cc
index cf5a4239a23..8de5c07226c 100644
--- a/tests/ut/cpp/parallel/ops_info/log_softmax_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/log_softmax_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc b/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc
index f710f51265f..2d5676f211f 100644
--- a/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc
@@ -18,11 +18,11 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/matmul_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
-#include "parallel/auto_parallel/graph_costmodel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
+#include "frontend/parallel/auto_parallel/graph_costmodel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/onehot_info_test.cc b/tests/ut/cpp/parallel/ops_info/onehot_info_test.cc
index 07d150a2944..074e4582f06 100644
--- a/tests/ut/cpp/parallel/ops_info/onehot_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/onehot_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/onehot_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/onehot_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/onehot_info_test_axis_0.cc b/tests/ut/cpp/parallel/ops_info/onehot_info_test_axis_0.cc
index c89bf97fb35..769d5bec455 100644
--- a/tests/ut/cpp/parallel/ops_info/onehot_info_test_axis_0.cc
+++ b/tests/ut/cpp/parallel/ops_info/onehot_info_test_axis_0.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/onehot_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/onehot_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/pow_info_test.cc b/tests/ut/cpp/parallel/ops_info/pow_info_test.cc
index 7b37a90fd83..f582640db8a 100644
--- a/tests/ut/cpp/parallel/ops_info/pow_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/pow_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/arithmetic_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/arithmetic_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/prelu_test.cc b/tests/ut/cpp/parallel/ops_info/prelu_test.cc
index d6db1b84607..1d4cf5eff0e 100644
--- a/tests/ut/cpp/parallel/ops_info/prelu_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/prelu_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/prelu_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/prelu_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/reduce_method_test.cc b/tests/ut/cpp/parallel/ops_info/reduce_method_test.cc
index a1fe46ca332..64ba6af70b5 100644
--- a/tests/ut/cpp/parallel/ops_info/reduce_method_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/reduce_method_test.cc
@@ -18,11 +18,11 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/reduce_method_info.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/reduce_method_info.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/reshape_test.cc b/tests/ut/cpp/parallel/ops_info/reshape_test.cc
index fb60c6d250a..8cc8390e9a4 100644
--- a/tests/ut/cpp/parallel/ops_info/reshape_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/reshape_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/reshape_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/reshape_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/softmax_entropy_loss_info_test.cc b/tests/ut/cpp/parallel/ops_info/softmax_entropy_loss_info_test.cc
index 03634b9a6fc..d370c168c9f 100644
--- a/tests/ut/cpp/parallel/ops_info/softmax_entropy_loss_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/softmax_entropy_loss_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/loss_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/loss_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/softmax_info_test.cc b/tests/ut/cpp/parallel/ops_info/softmax_info_test.cc
index bba6e89626e..9c4205672b4 100644
--- a/tests/ut/cpp/parallel/ops_info/softmax_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/softmax_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/tanh_info_test.cc b/tests/ut/cpp/parallel/ops_info/tanh_info_test.cc
index a892c5c84a9..2be6c5bf7f4 100644
--- a/tests/ut/cpp/parallel/ops_info/tanh_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/tanh_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/activation_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/activation_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc b/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc
index 42d292c605d..b523652fcbb 100644
--- a/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/tensor_add_info_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/arithmetic_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/arithmetic_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc b/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc
index eabac51e173..461a27d4ed6 100644
--- a/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/tmpidentity_test.cc
@@ -15,10 +15,10 @@
  */
 
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/device_manager.h"
-#include "parallel/ops_info/operator_info.h"
-#include "parallel/ops_info/tmp_identity_info.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/parallel/ops_info/tmp_identity_info.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/ops_info/transpose_test.cc b/tests/ut/cpp/parallel/ops_info/transpose_test.cc
index 991ec478205..fe5cbb01b3d 100644
--- a/tests/ut/cpp/parallel/ops_info/transpose_test.cc
+++ b/tests/ut/cpp/parallel/ops_info/transpose_test.cc
@@ -18,10 +18,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/transpose_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/transpose_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/step_auto_parallel_test.cc b/tests/ut/cpp/parallel/step_auto_parallel_test.cc
index a1474ca2447..6cf7ec66c64 100644
--- a/tests/ut/cpp/parallel/step_auto_parallel_test.cc
+++ b/tests/ut/cpp/parallel/step_auto_parallel_test.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "parallel/step_parallel.h"
-#include "parallel/step_auto_parallel.h"
-#include "parallel/auto_parallel/edge_costmodel.h"
-#include "parallel/ops_info/operator_info.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "frontend/parallel/step_parallel.h"
+#include "frontend/parallel/step_auto_parallel.h"
+#include "frontend/parallel/auto_parallel/edge_costmodel.h"
+#include "frontend/parallel/ops_info/operator_info.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/step_parallel_test.cc b/tests/ut/cpp/parallel/step_parallel_test.cc
index d8f8681a349..5657db87906 100644
--- a/tests/ut/cpp/parallel/step_parallel_test.cc
+++ b/tests/ut/cpp/parallel/step_parallel_test.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "parallel/step_parallel.h"
-#include "parallel/graph_util/generate_graph.h"
+#include "frontend/parallel/step_parallel.h"
+#include "frontend/parallel/graph_util/generate_graph.h"
 #include "common/py_func_graph_fetcher.h"
 #include "debug/draw.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/strategy_test.cc b/tests/ut/cpp/parallel/strategy_test.cc
index 9a2f92f018c..c13b71944e8 100644
--- a/tests/ut/cpp/parallel/strategy_test.cc
+++ b/tests/ut/cpp/parallel/strategy_test.cc
@@ -17,7 +17,7 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
+#include "frontend/parallel/strategy.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/tensor_layout/construct_operator_test.cc b/tests/ut/cpp/parallel/tensor_layout/construct_operator_test.cc
index 2ba8cc9dfc1..b80f1990351 100644
--- a/tests/ut/cpp/parallel/tensor_layout/construct_operator_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/construct_operator_test.cc
@@ -17,10 +17,10 @@
 #include <vector>
 #include "common/common_test.h"
 #include "ir/value.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/matmul_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/tensor_layout/construct_operator.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/matmul_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/tensor_layout/construct_operator.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/tensor_layout/redistribution_layout_transfer_test.cc b/tests/ut/cpp/parallel/tensor_layout/redistribution_layout_transfer_test.cc
index 5291e2f48de..4ddc130a452 100644
--- a/tests/ut/cpp/parallel/tensor_layout/redistribution_layout_transfer_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/redistribution_layout_transfer_test.cc
@@ -17,8 +17,8 @@
 #include <vector>
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/tensor_layout/tensor_layout.h"
-#include "parallel/tensor_layout/redistribution_layout_transfer.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/tensor_layout/redistribution_layout_transfer.h"
 #include "util_layout_gen_test.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/parallel/tensor_layout/redistribution_operator_infer_test.cc b/tests/ut/cpp/parallel/tensor_layout/redistribution_operator_infer_test.cc
index 1b1dd4af043..f6caad2f9d7 100644
--- a/tests/ut/cpp/parallel/tensor_layout/redistribution_operator_infer_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/redistribution_operator_infer_test.cc
@@ -16,8 +16,8 @@
 
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/tensor_layout/redistribution_operator_infer.h"
-#include "parallel/device_manager.h"
+#include "frontend/parallel/tensor_layout/redistribution_operator_infer.h"
+#include "frontend/parallel/device_manager.h"
 #include "util_layout_gen_test.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/parallel/tensor_layout/reshape_layout_transfer_test.cc b/tests/ut/cpp/parallel/tensor_layout/reshape_layout_transfer_test.cc
index 9d6152721e6..11f471ea336 100644
--- a/tests/ut/cpp/parallel/tensor_layout/reshape_layout_transfer_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/reshape_layout_transfer_test.cc
@@ -17,8 +17,8 @@
 #include <algorithm>
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/tensor_layout/tensor_layout.h"
-#include "parallel/tensor_layout/reshape_layout_transfer.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/tensor_layout/reshape_layout_transfer.h"
 #include "util_layout_gen_test.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/parallel/tensor_layout/shape_util_test.cc b/tests/ut/cpp/parallel/tensor_layout/shape_util_test.cc
index b5e2ea3e5bd..824ab876cdf 100644
--- a/tests/ut/cpp/parallel/tensor_layout/shape_util_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/shape_util_test.cc
@@ -16,7 +16,7 @@
 #include <vector>
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/tensor_layout/tensor_layout_test.cc b/tests/ut/cpp/parallel/tensor_layout/tensor_layout_test.cc
index bae05d650a1..15fb16f088e 100644
--- a/tests/ut/cpp/parallel/tensor_layout/tensor_layout_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/tensor_layout_test.cc
@@ -17,7 +17,7 @@
 #include <vector>
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/tensor_layout/tensor_redistribution_test.cc b/tests/ut/cpp/parallel/tensor_layout/tensor_redistribution_test.cc
index 572763faa38..40a4017c4bf 100644
--- a/tests/ut/cpp/parallel/tensor_layout/tensor_redistribution_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/tensor_redistribution_test.cc
@@ -17,7 +17,7 @@
 #include <vector>
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "parallel/tensor_layout/tensor_redistribution.h"
+#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.cc b/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.cc
index 6f5c1e49ed1..330b571ae7e 100644
--- a/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.cc
+++ b/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.cc
@@ -21,7 +21,7 @@
 #include <utility>
 #include <algorithm>
 #include <iterator>
-#include "parallel/tensor_layout/shape_util.h"
+#include "frontend/parallel/tensor_layout/shape_util.h"
 #include "common/common_test.h"
 
 using std::pow;
diff --git a/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.h b/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.h
index a359cadbea6..c16a1fc6d4e 100644
--- a/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.h
+++ b/tests/ut/cpp/parallel/tensor_layout/util_layout_gen_test.h
@@ -20,7 +20,7 @@
 #include <tuple>
 #include <vector>
 
-#include "parallel/tensor_layout/tensor_layout.h"
+#include "frontend/parallel/tensor_layout/tensor_layout.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/parallel/virtual_dataset_test.cc b/tests/ut/cpp/parallel/virtual_dataset_test.cc
index 1d3ff081c7c..4cafdebc172 100644
--- a/tests/ut/cpp/parallel/virtual_dataset_test.cc
+++ b/tests/ut/cpp/parallel/virtual_dataset_test.cc
@@ -17,10 +17,10 @@
 #include <list>
 #include <vector>
 #include "common/common_test.h"
-#include "parallel/strategy.h"
-#include "parallel/ops_info/virtual_dataset_info.h"
-#include "parallel/device_manager.h"
-#include "parallel/step_parallel.h"
+#include "frontend/parallel/strategy.h"
+#include "frontend/parallel/ops_info/virtual_dataset_info.h"
+#include "frontend/parallel/device_manager.h"
+#include "frontend/parallel/step_parallel.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/tests/ut/cpp/pipeline/parse/parser_abnormal_test.cc b/tests/ut/cpp/pipeline/parse/parser_abnormal_test.cc
index 3c97cfb2038..2d21b591ea3 100644
--- a/tests/ut/cpp/pipeline/parse/parser_abnormal_test.cc
+++ b/tests/ut/cpp/pipeline/parse/parser_abnormal_test.cc
@@ -19,7 +19,7 @@
 #include "common/py_func_graph_fetcher.h"
 #include "utils/log_adapter.h"
 #include "utils/profile.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pipeline/parse/parser_class_test.cc b/tests/ut/cpp/pipeline/parse/parser_class_test.cc
index dcedc32b1ba..8d9cc8ebc8a 100644
--- a/tests/ut/cpp/pipeline/parse/parser_class_test.cc
+++ b/tests/ut/cpp/pipeline/parse/parser_class_test.cc
@@ -19,7 +19,7 @@
 #include "common/py_func_graph_fetcher.h"
 
 #include "utils/log_adapter.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pipeline/parse/parser_integrate_test.cc b/tests/ut/cpp/pipeline/parse/parser_integrate_test.cc
index fd8438503fe..1f54298a811 100644
--- a/tests/ut/cpp/pipeline/parse/parser_integrate_test.cc
+++ b/tests/ut/cpp/pipeline/parse/parser_integrate_test.cc
@@ -18,7 +18,7 @@
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "utils/log_adapter.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pipeline/parse/parser_primitive_test.cc b/tests/ut/cpp/pipeline/parse/parser_primitive_test.cc
index adc09cca327..937ad1fe5eb 100644
--- a/tests/ut/cpp/pipeline/parse/parser_primitive_test.cc
+++ b/tests/ut/cpp/pipeline/parse/parser_primitive_test.cc
@@ -19,7 +19,7 @@
 #include "common/py_func_graph_fetcher.h"
 
 #include "utils/log_adapter.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pipeline/parse/parser_test.cc b/tests/ut/cpp/pipeline/parse/parser_test.cc
index 4d7731dfd1f..f1d90871102 100644
--- a/tests/ut/cpp/pipeline/parse/parser_test.cc
+++ b/tests/ut/cpp/pipeline/parse/parser_test.cc
@@ -19,7 +19,7 @@
 #include "common/py_func_graph_fetcher.h"
 
 #include "utils/log_adapter.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pipeline/parse/resolve_test.cc b/tests/ut/cpp/pipeline/parse/resolve_test.cc
index 8ade92bb349..5a2d0ebd7fb 100644
--- a/tests/ut/cpp/pipeline/parse/resolve_test.cc
+++ b/tests/ut/cpp/pipeline/parse/resolve_test.cc
@@ -19,7 +19,7 @@
 #include "common/py_func_graph_fetcher.h"
 
 #include "utils/log_adapter.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pipeline/resource_test.cc b/tests/ut/cpp/pipeline/resource_test.cc
index 09bd2060dc3..b6be393652b 100644
--- a/tests/ut/cpp/pipeline/resource_test.cc
+++ b/tests/ut/cpp/pipeline/resource_test.cc
@@ -18,9 +18,9 @@
 
 #include "common/common_test.h"
 #include "utils/log_adapter.h"
-#include "pipeline/resource.h"
+#include "pipeline/jit/resource.h"
 #include "ir/primitive.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace pipeline {
diff --git a/tests/ut/cpp/pipeline/static_analysis/data_test.cc b/tests/ut/cpp/pipeline/static_analysis/data_test.cc
index 61a22bbe5fc..fb9d8b1f7ef 100644
--- a/tests/ut/cpp/pipeline/static_analysis/data_test.cc
+++ b/tests/ut/cpp/pipeline/static_analysis/data_test.cc
@@ -18,9 +18,9 @@
 
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
-#include "pipeline/static_analysis/utils.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
+#include "abstract/utils.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/tests/ut/cpp/pipeline/static_analysis/evaluator_test.cc b/tests/ut/cpp/pipeline/static_analysis/evaluator_test.cc
index eebe6c252ba..664f353faae 100644
--- a/tests/ut/cpp/pipeline/static_analysis/evaluator_test.cc
+++ b/tests/ut/cpp/pipeline/static_analysis/evaluator_test.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "pipeline/static_analysis/evaluator.h"
-#include "pipeline/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/evaluator.h"
+#include "pipeline/jit/static_analysis/prim.h"
 
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
diff --git a/tests/ut/cpp/pipeline/static_analysis/helper.cc b/tests/ut/cpp/pipeline/static_analysis/helper.cc
index db697e95e0e..ebf8c233e24 100644
--- a/tests/ut/cpp/pipeline/static_analysis/helper.cc
+++ b/tests/ut/cpp/pipeline/static_analysis/helper.cc
@@ -16,7 +16,7 @@
 
 #include "pipeline/static_analysis/helper.h"
 
-#include "pipeline/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/prim.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/tests/ut/cpp/pipeline/static_analysis/helper.h b/tests/ut/cpp/pipeline/static_analysis/helper.h
index 7ca902a1e94..44c647779e3 100644
--- a/tests/ut/cpp/pipeline/static_analysis/helper.h
+++ b/tests/ut/cpp/pipeline/static_analysis/helper.h
@@ -17,7 +17,7 @@
 #ifndef TESTS_UT_PIPELINE_STATIC_ANALYSIS_HELPER_H_
 #define TESTS_UT_PIPELINE_STATIC_ANALYSIS_HELPER_H_
 
-#include "pipeline/static_analysis/evaluator.h"
+#include "pipeline/jit/static_analysis/evaluator.h"
 
 namespace mindspore {
 namespace abstract {
diff --git a/tests/ut/cpp/pipeline/static_analysis/prim_test.cc b/tests/ut/cpp/pipeline/static_analysis/prim_test.cc
index 04a14a0f293..8ebea4d2122 100644
--- a/tests/ut/cpp/pipeline/static_analysis/prim_test.cc
+++ b/tests/ut/cpp/pipeline/static_analysis/prim_test.cc
@@ -21,9 +21,9 @@
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "ir/manager.h"
-#include "pipeline/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/prim.h"
 #include "pipeline/static_analysis/helper.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "debug/draw.h"
 #include "ir/tensor.h"
 #include "utils/symbolic.h"
diff --git a/tests/ut/cpp/pipeline/static_analysis/specialize_test.cc b/tests/ut/cpp/pipeline/static_analysis/specialize_test.cc
index 23ea55f8f70..e32a86d9bed 100644
--- a/tests/ut/cpp/pipeline/static_analysis/specialize_test.cc
+++ b/tests/ut/cpp/pipeline/static_analysis/specialize_test.cc
@@ -20,8 +20,8 @@
 #include "common/py_func_graph_fetcher.h"
 
 #include "ir/manager.h"
-#include "pipeline/static_analysis/prim.h"
-#include "pipeline/static_analysis/program_specialize.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/program_specialize.h"
 #include "pipeline/static_analysis/helper.h"
 #include "utils/log_adapter.h"
 #include "utils/graph_utils.h"
diff --git a/tests/ut/cpp/pipeline/static_analysis/static_analysis_test.cc b/tests/ut/cpp/pipeline/static_analysis/static_analysis_test.cc
index 8a58969e120..78d3a7083a6 100644
--- a/tests/ut/cpp/pipeline/static_analysis/static_analysis_test.cc
+++ b/tests/ut/cpp/pipeline/static_analysis/static_analysis_test.cc
@@ -16,16 +16,16 @@
 #include <iostream>
 #include <memory>
 
-#include "pipeline/static_analysis/prim.h"
+#include "pipeline/jit/static_analysis/prim.h"
 #include "pipeline/static_analysis/helper.h"
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "ir/manager.h"
 #include "ir/tensor.h"
-#include "operator/ops.h"
-#include "pipeline/parse/parse.h"
-#include "pipeline/parse/data_converter.h"
-#include "pipeline/resource.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/jit/parse/parse.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "pipeline/jit/resource.h"
 #include "debug/draw.h"
 #include "utils/log_adapter.h"
 
diff --git a/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc
index 483c144930e..58b810a3e1f 100644
--- a/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc
@@ -17,23 +17,23 @@
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "debug/anf_ir_dump.h"
-#include "kernel/kernel.h"
-#include "device/kernel_info.h"
-#include "pre_activate/common/optimizer.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h"
-#include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h"
-#include "pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.h"
+#include "backend/optimizer/ascend/buffer_fusion/eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_single_in_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_double_in_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h"
+#include "backend/optimizer/ascend/buffer_fusion/segment_eltwise_fusion_pass.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc b/tests/ut/cpp/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc
index e4ab2431b72..ba64c206af6 100644
--- a/tests/ut/cpp/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc
+++ b/tests/ut/cpp/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc
@@ -15,14 +15,14 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
-#include "mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "mindspore/ccsrc/backend/optimizer/ascend/enhancer/getnext_memcpy_elimination.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc
index 56bf0ae4e08..2be25212e88 100644
--- a/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc
+++ b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc
@@ -15,16 +15,16 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/ascend_session.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pipeline/resource.h"
-#include "operator/ops.h"
+#include "backend/session/ascend_session.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "pipeline/jit/resource.h"
+#include "frontend/operator/ops.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/enhancer/insert_memcpy_async_for_getnext.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op_test.cc b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op_test.cc
index 22cf70ded3f..103d0f21a4d 100644
--- a/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op_test.cc
@@ -15,16 +15,16 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
 #define private public
 #define protected public
-#include "pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op.h"
+#include "backend/optimizer/ascend/enhancer/insert_memcpy_async_for_hccl_op.h"
 #undef private
 #undef protected
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/format_type/check_consistency_test.cc b/tests/ut/cpp/pre_activate/ascend/format_type/check_consistency_test.cc
index 72ce73e20f3..89d680f442b 100644
--- a/tests/ut/cpp/pre_activate/ascend/format_type/check_consistency_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/format_type/check_consistency_test.cc
@@ -16,18 +16,18 @@
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "common/backend_common_test.h"
-#include "session/ascend_session.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pipeline/resource.h"
-#include "pipeline/action.h"
-#include "operator/ops.h"
+#include "backend/session/ascend_session.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "pipeline/jit/resource.h"
+#include "pipeline/jit/action.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/format_type/check_consistency.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/format_type/check_consistency.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/format_type/insert_cast_test.cc b/tests/ut/cpp/pre_activate/ascend/format_type/insert_cast_test.cc
index 317eace6c6e..2b61a490488 100644
--- a/tests/ut/cpp/pre_activate/ascend/format_type/insert_cast_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/format_type/insert_cast_test.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 #include "common/backend_common_test.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "device/kernel_info.h"
-#include "pre_activate/ascend/format_type/insert_cast.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/ascend/format_type/insert_cast.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
 
diff --git a/tests/ut/cpp/pre_activate/ascend/format_type/insert_trans_op_test.cc b/tests/ut/cpp/pre_activate/ascend/format_type/insert_trans_op_test.cc
index 8c57238e0a0..0a5cf3dd9e4 100644
--- a/tests/ut/cpp/pre_activate/ascend/format_type/insert_trans_op_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/format_type/insert_trans_op_test.cc
@@ -14,18 +14,18 @@
  * limitations under the License.
  */
 #include "common/backend_common_test.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
 #include "utils/context/ms_context.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/format_type/insert_trans_op.h"
+#include "backend/optimizer/ascend/format_type/insert_trans_op.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/format_type/merge_cast_to_op_test.cc b/tests/ut/cpp/pre_activate/ascend/format_type/merge_cast_to_op_test.cc
index c0017c2debd..69e7fa8b278 100644
--- a/tests/ut/cpp/pre_activate/ascend/format_type/merge_cast_to_op_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/format_type/merge_cast_to_op_test.cc
@@ -15,17 +15,17 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/format_type/merge_cast_to_op.h"
+#include "backend/optimizer/ascend/format_type/merge_cast_to_op.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/addn_fission_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/addn_fission_test.cc
index 90174636b1f..8ec2b22a795 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/addn_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/addn_fission_test.cc
@@ -18,7 +18,7 @@
 #include "common/py_func_graph_fetcher.h"
 #define private public
 #define protected public
-#include "pre_activate/ascend/ir_fission/addn_fission.h"
+#include "backend/optimizer/ascend/ir_fission/addn_fission.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_bert_fission_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_bert_fission_test.cc
index 06895cb0815..f793e0371bf 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_bert_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_bert_fission_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fission/batch_norm_bert_fission.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.h"
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission_test.cc
index ea4a5c0d5d5..80f30c89386 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fission/batch_norm_grad_infer_fission.h"
+#include "backend/optimizer/ascend/ir_fission/batch_norm_grad_infer_fission.h"
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_grad_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_grad_split_test.cc
index dc437221f87..f0a5a857b94 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_grad_split_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_grad_split_test.cc
@@ -15,17 +15,17 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/ir_fission/bn_grad_split.h"
+#include "backend/optimizer/ascend/ir_fission/bn_grad_split.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_split_test.cc
index c5ebc28b480..9f4f31bf827 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_split_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/bn_split_test.cc
@@ -15,20 +15,20 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/ascend_session.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pipeline/resource.h"
-#include "operator/ops.h"
+#include "backend/session/ascend_session.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "pipeline/jit/resource.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/ir_fission/bn_split.h"
+#include "backend/optimizer/ascend/ir_fission/bn_split.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/lars_v2_fission_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/lars_v2_fission_test.cc
index c0a0cc455eb..c726142e999 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/lars_v2_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/lars_v2_fission_test.cc
@@ -16,7 +16,7 @@
 
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/ascend/ir_fission/lars_v2_fission.h"
+#include "backend/optimizer/ascend/ir_fission/lars_v2_fission.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/layer_norm_grad_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/layer_norm_grad_split_test.cc
index 1df87960e39..4303485d854 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/layer_norm_grad_split_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/layer_norm_grad_split_test.cc
@@ -15,17 +15,17 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "operator/ops.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "kernel/kernel_build_info.h"
-#include "pre_activate/common/optimizer.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
+#include "backend/optimizer/common/optimizer.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/ir_fission/layer_norm_grad_split.h"
+#include "backend/optimizer/ascend/ir_fission/layer_norm_grad_split.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/single_batch_norm_fission_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/single_batch_norm_fission_test.cc
index b0aa455a0ad..9f84f226780 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/single_batch_norm_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/single_batch_norm_fission_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fission/single_batch_norm_fission.h"
+#include "backend/optimizer/ascend/ir_fission/single_batch_norm_fission.h"
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "debug/anf_ir_dump.h"
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/split_fission_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/split_fission_test.cc
index ab70e83480a..30de43be4e4 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/split_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/split_fission_test.cc
@@ -18,7 +18,7 @@
 #include "common/py_func_graph_fetcher.h"
 #define private public
 #define protected public
-#include "pre_activate/ascend/ir_fission/split_fission.h"
+#include "backend/optimizer/ascend/ir_fission/split_fission.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/tensor_scatter_update_fission_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/tensor_scatter_update_fission_test.cc
index faebe0e4a01..1c928b581d3 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/tensor_scatter_update_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/tensor_scatter_update_fission_test.cc
@@ -16,7 +16,7 @@
 
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/ascend/ir_fission/tensor_scatter_update_fission.h"
+#include "backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc
index b09268aa662..2ab614d4c29 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc
@@ -16,13 +16,13 @@
 
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "device/kernel_info.h"
-#include "pre_activate/pass/convert_const_input_to_attr.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/pass/convert_const_input_to_attr.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #define private public
 #define protected public
-#include "pre_activate/ascend/ir_fission/topk_split.h"
+#include "backend/optimizer/ascend/ir_fission/topk_split.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/transdata_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/transdata_split_test.cc
index f2b975a08e7..220e45f10a0 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/transdata_split_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/transdata_split_test.cc
@@ -16,16 +16,16 @@
 
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "device/kernel_info.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/oplib/oplib.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/context/ms_context.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/format_type/insert_trans_op.h"
-#include "pre_activate/ascend/ir_fission/transdata_split.h"
+#include "backend/optimizer/ascend/format_type/insert_trans_op.h"
+#include "backend/optimizer/ascend/ir_fission/transdata_split.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_fusion_test.cc
index c2ee7b6519b..27598640379 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_fusion_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/adam_apply_one_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule_test.cc
index 014e60f5792..78c815bf506 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/adam_apply_one_with_decay_rule.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/add_input_to_output_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/add_input_to_output_test.cc
new file mode 100644
index 00000000000..5d42ff70692
--- /dev/null
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/add_input_to_output_test.cc
@@ -0,0 +1,74 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "common/backend_common_test.h"
+#include "common/py_func_graph_fetcher.h"
+#include "debug/anf_ir_dump.h"
+
+#define private public
+#define protected public
+#include "backend/optimizer/ascend/ir_fusion/add_input_to_output.h"
+#undef private
+#undef protected
+
+namespace mindspore {
+namespace opt {
+class TestHWAddInputToOutput : public BackendCommon {
+ public:
+  TestHWAddInputToOutput() : getPyFun_("gtest_input.pre_activate.add_input_to_output_test", true) {}
+  ~TestHWAddInputToOutput() override = default;
+
+ public:
+  UT::PyFuncGraphFetcher getPyFun_;
+};
+
+class MockOpFinder : public OpFinder {
+ public:
+  MockOpFinder() = default;
+  ~MockOpFinder() override = default;
+  int GetOpRegisteredOutputNum(const std::string &op_name) override { return 2; }
+};
+
+TEST_F(TestHWAddInputToOutput, test_add_input_to_output) {
+  FuncGraphPtr g = getPyFun_.CallAndParseRet("test_add_input_to_output", "before");
+  EXPECT_NE(g, nullptr);
+  std::vector<int> shp{2, 32, 224, 224};
+  auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
+  AbstractBasePtrList args_spec_list;
+  for (size_t i = 0; i < 5; ++i) {
+    args_spec_list.push_back(x_abstract);
+  }
+  auto kg = GetKernelGraph(g, args_spec_list);
+  EXPECT_NE(kg, nullptr);
+  auto ret = kg->get_return();
+  EXPECT_NE(ret, nullptr);
+  auto make_tuple = ret->input(1);
+  EXPECT_NE(make_tuple, nullptr);
+  auto momentum = make_tuple->cast<CNodePtr>()->input(1);
+  EXPECT_NE(momentum, nullptr);
+  EXPECT_NE(momentum->abstract(), nullptr);
+  EXPECT_FALSE(momentum->abstract()->isa<abstract::AbstractTuple>());
+
+  auto optimizer = std::make_shared<opt::GraphOptimizer>();
+  auto pm = std::make_shared<opt::PassManager>();
+  auto pass = std::make_shared<opt::AddInputToOutput>();
+  pass->op_finder_ = std::make_shared<MockOpFinder>();
+  pm->AddPass(pass);
+  optimizer->AddPassManager(pm);
+  (void)optimizer->Optimize(kg);
+  EXPECT_TRUE(momentum->abstract()->isa<abstract::AbstractTuple>());
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer_test.cc
index 466cba8e672..d9d0baf7be2 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnorm_to_bninfer_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad_test.cc
index d1fc2783acb..1b64e5fd00e 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion_test.cc
index 0c8bf673918..aa56d79239b 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_value_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_value_fusion_test.cc
index 4160c3a8e44..ac01f9b1dd3 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_value_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/clip_by_value_fusion_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/clip_by_value_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc
index 20448578418..be6bd95b02a 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/confusion_mul_grad_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_softmax_grad_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_softmax_grad_test.cc
index 05fa2c65df4..068cc0d12e4 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_softmax_grad_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_softmax_grad_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/derelu_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/derelu_fusion_test.cc
index ffa5a42b4d2..663ed309ee3 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/derelu_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/derelu_fusion_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/derelu_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/derelu_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc
index 597b7b18ff0..f7cbfdc6785 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.h"
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_rule_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_rule_test.cc
index 6ea622d0302..64c004ff275 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_rule_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_rule_test.cc
@@ -17,7 +17,7 @@
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "debug/anf_ir_dump.h"
-#include "pre_activate/ascend/ir_fusion/lamb_next_mv_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule_test.cc
index 36f0321511a..776ce625b7a 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule_test.cc
@@ -16,7 +16,7 @@
 
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_rule.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule_test.cc
index fbb1f5e9138..bf21649672d 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule_test.cc
@@ -16,7 +16,7 @@
 
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_with_decay_v1_rule.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_right_rule_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_right_rule_test.cc
index f1ca92c8118..6a7c866ab4e 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_right_rule_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_next_right_rule_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/ascend/ir_fusion/lamb_next_right_rule.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_next_right_rule.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion_test.cc
index 7a2806162b5..4de2de2700f 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2_test.cc
index 05262e72ab1..5be6195da20 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2_test.cc
@@ -17,7 +17,7 @@
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "debug/anf_ir_dump.h"
-#include "pre_activate/ascend/ir_fusion/lamb_update_with_lr_v2.h"
+#include "backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_v2.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion_test.cc
index 44b9b3df693..7392d05b98f 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion_test.cc
@@ -15,13 +15,13 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "device/kernel_info.h"
+#include "runtime/device/kernel_info.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/layer_norm_beta_gamma_backprop_fusion.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion_test.cc
index c8f97be2905..f67eda97764 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/matmul_biasadd_fusion_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.h"
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion_test.cc
index 114fcf42330..50dfd66f547 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/momentum_lossscale_fusion_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.h"
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_add_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_add_fusion_test.cc
index 87bb21f89a6..b293cdeecb8 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_add_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_add_fusion_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/ascend/ir_fusion/mul_add_fusion.h"
+#include "backend/optimizer/ascend/ir_fusion/mul_add_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_addn_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_addn_fusion_test.cc
index ab9718d80a6..8ac106f81c8 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_addn_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/mul_addn_fusion_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "mindspore/ccsrc/pre_activate/ascend/ir_fusion/mul_addn_fusion.h"
+#include "mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/mul_addn_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/reshape_transpose_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/reshape_transpose_fusion_test.cc
index 59140e91a18..6792f4720ad 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/reshape_transpose_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/reshape_transpose_fusion_test.cc
@@ -17,8 +17,8 @@
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/ascend/ir_fusion/reshape_transpose_fusion.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/ascend/ir_fusion/reshape_transpose_fusion.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion_test.cc
index 5f02f0e9c17..f6e8a1194c5 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/square_sum_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/square_sum_fusion_test.cc
index 2dd858a0fca..efe5433d75c 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/square_sum_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/square_sum_fusion_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fusion/square_sum_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/ascend/ir_fusion/square_sum_fusion.h"
 #include "debug/anf_ir_dump.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_reshape_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_reshape_fusion_test.cc
index 3290acd42f3..6ec407d2eaf 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_reshape_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_reshape_fusion_test.cc
@@ -17,8 +17,8 @@
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/ascend/ir_fusion/transpose_reshape_fusion.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/ascend/ir_fusion/transpose_reshape_fusion.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_transdata_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_transdata_fusion_test.cc
index 98dc9e9efc3..d156959c4c5 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_transdata_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/transpose_transdata_fusion_test.cc
@@ -16,14 +16,14 @@
 
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "device/kernel_info.h"
-#include "session/anf_runtime_algorithm.h"
-#include "kernel/oplib/oplib.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/oplib/oplib.h"
 #include "utils/context/ms_context.h"
 #define private public
 #define protected public
-#include "pre_activate/ascend/format_type/insert_trans_op.h"
-#include "pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h"
+#include "backend/optimizer/ascend/format_type/insert_trans_op.h"
+#include "backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/common/pattern_engine_test.cc b/tests/ut/cpp/pre_activate/common/pattern_engine_test.cc
index 7b0e2cc9db8..12030433fce 100644
--- a/tests/ut/cpp/pre_activate/common/pattern_engine_test.cc
+++ b/tests/ut/cpp/pre_activate/common/pattern_engine_test.cc
@@ -20,8 +20,8 @@
 #include <algorithm>
 
 #include "common/common_test.h"
-#include "pre_activate/common/pattern_engine.h"
-#include "pre_activate/common/visit.h"
+#include "backend/optimizer/common/pattern_engine.h"
+#include "backend/optimizer/common/visit.h"
 #include "utils/base_ref.h"
 #include "ir/anf.h"
 
diff --git a/tests/ut/cpp/pre_activate/mem_reuse/kernel_ref_test.cc b/tests/ut/cpp/pre_activate/mem_reuse/kernel_ref_test.cc
index 5b237fda587..8b6d3e061a7 100644
--- a/tests/ut/cpp/pre_activate/mem_reuse/kernel_ref_test.cc
+++ b/tests/ut/cpp/pre_activate/mem_reuse/kernel_ref_test.cc
@@ -18,7 +18,7 @@
 #include <vector>
 #include <string>
 
-#include "pre_activate/mem_reuse/kernel_refcount.h"
+#include "backend/optimizer/mem_reuse/kernel_refcount.h"
 
 #include "utils/utils.h"
 #include "common/common_test.h"
diff --git a/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_allocator_test.cc b/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_allocator_test.cc
index e0966d2d129..2a6904658e6 100644
--- a/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_allocator_test.cc
+++ b/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_allocator_test.cc
@@ -17,9 +17,9 @@
 #include <memory>
 #include <vector>
 #include <string>
-#include "operator/ops.h"
-#include "pre_activate/mem_reuse/mem_reuse.h"
-#include "pre_activate/mem_reuse/mem_reuse_allocator.h"
+#include "frontend/operator/ops.h"
+#include "backend/optimizer/mem_reuse/mem_reuse.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
 
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
diff --git a/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc b/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc
index a36463d2975..31ae923c0ad 100644
--- a/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc
+++ b/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc
@@ -16,19 +16,19 @@
 #include <memory>
 #include <vector>
 #include <string>
-#include "session/kernel_graph.h"
-#include "session/session_basic.h"
-#include "session/ascend_session.h"
-#include "pre_activate/mem_reuse/kernel_refcount.h"
-#include "pre_activate/mem_reuse/mem_reuse_allocator.h"
-#include "device/kernel_info.h"
-#include "kernel/tbe/tbe_kernel_mod.h"
-#include "operator/ops.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/session_basic.h"
+#include "backend/session/ascend_session.h"
+#include "backend/optimizer/mem_reuse/kernel_refcount.h"
+#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_mod.h"
+#include "frontend/operator/ops.h"
 #include "utils/log_adapter.h"
-#include "session/anf_runtime_algorithm.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "common/utils.h"
-#include "pipeline/resource.h"
-#include "pre_activate/mem_reuse/mem_reuse.h"
+#include "pipeline/jit/resource.h"
+#include "backend/optimizer/mem_reuse/mem_reuse.h"
 
 #include "common/common_test.h"
 #include "common/py_func_graph_fetcher.h"
diff --git a/tests/ut/cpp/pre_activate/pass/allreduce_fusion_test.cc b/tests/ut/cpp/pre_activate/pass/allreduce_fusion_test.cc
index 69a330614e9..02e1865a82f 100644
--- a/tests/ut/cpp/pre_activate/pass/allreduce_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/allreduce_fusion_test.cc
@@ -15,16 +15,16 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/pass/communication_op_fusion.h"
-#include "pre_activate/common/optimizer.h"
-#include "device/kernel_info.h"
-#include "pre_activate/common/pass_manager.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/pass/communication_op_fusion.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
 
diff --git a/tests/ut/cpp/pre_activate/pass/common_subexpression_elimination_test.cc b/tests/ut/cpp/pre_activate/pass/common_subexpression_elimination_test.cc
index 12c4d35db5c..cfcc34970b3 100644
--- a/tests/ut/cpp/pre_activate/pass/common_subexpression_elimination_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/common_subexpression_elimination_test.cc
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 #include "common/backend_common_test.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "device/kernel_info.h"
-#include "pre_activate/pass/common_subexpression_elimination.h"
-#include "kernel/kernel_build_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "runtime/device/kernel_info.h"
+#include "backend/optimizer/pass/common_subexpression_elimination.h"
+#include "backend/kernel_compiler/kernel_build_info.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
 
diff --git a/tests/ut/cpp/pre_activate/pass/const_to_attr_strided_slice_grad_test.cc b/tests/ut/cpp/pre_activate/pass/const_to_attr_strided_slice_grad_test.cc
index 8fc709433e0..25e4b3c1110 100644
--- a/tests/ut/cpp/pre_activate/pass/const_to_attr_strided_slice_grad_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/const_to_attr_strided_slice_grad_test.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 #include "common/backend_common_test.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "pre_activate/pass/const_to_attr_strided_slice_grad.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/optimizer/pass/const_to_attr_strided_slice_grad.h"
 #include "utils/utils.h"
 #include "common/utils.h"
 
diff --git a/tests/ut/cpp/pre_activate/pass/convert_const_input_to_attr_test.cc b/tests/ut/cpp/pre_activate/pass/convert_const_input_to_attr_test.cc
index fcb3b19a249..ac3272317a4 100644
--- a/tests/ut/cpp/pre_activate/pass/convert_const_input_to_attr_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/convert_const_input_to_attr_test.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 #include "common/backend_common_test.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "pre_activate/pass/convert_const_input_to_attr.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/optimizer/pass/convert_const_input_to_attr.h"
 #include "utils/utils.h"
 #include "common/utils.h"
 
diff --git a/tests/ut/cpp/pre_activate/pass/convert_const_input_to_tensor_input_test.cc b/tests/ut/cpp/pre_activate/pass/convert_const_input_to_tensor_input_test.cc
index 1749e54d94e..5b303d15a5e 100644
--- a/tests/ut/cpp/pre_activate/pass/convert_const_input_to_tensor_input_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/convert_const_input_to_tensor_input_test.cc
@@ -18,10 +18,10 @@
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "pre_activate/pass/convert_const_input_to_tensor_input.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/optimizer/pass/convert_const_input_to_tensor_input.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/pass/convert_tuple_input_to_dynamic_input_test.cc b/tests/ut/cpp/pre_activate/pass/convert_tuple_input_to_dynamic_input_test.cc
index aded376536a..2c1dfc1c6cd 100644
--- a/tests/ut/cpp/pre_activate/pass/convert_tuple_input_to_dynamic_input_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/convert_tuple_input_to_dynamic_input_test.cc
@@ -18,10 +18,10 @@
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "pre_activate/pass/convert_tuple_input_to_dynamic_input.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/optimizer/pass/convert_tuple_input_to_dynamic_input.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/pass/convert_tuple_output_to_maketuple_test.cc b/tests/ut/cpp/pre_activate/pass/convert_tuple_output_to_maketuple_test.cc
index eeb01270e29..458c8542188 100644
--- a/tests/ut/cpp/pre_activate/pass/convert_tuple_output_to_maketuple_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/convert_tuple_output_to_maketuple_test.cc
@@ -18,10 +18,10 @@
 #include "ir/tensor.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
-#include "pre_activate/pass/convert_tuple_output_to_maketuple.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
+#include "backend/optimizer/pass/convert_tuple_output_to_maketuple.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/pre_activate/pass/eliminate_redundant_op_test.cc b/tests/ut/cpp/pre_activate/pass/eliminate_redundant_op_test.cc
index 3e431550113..07bef7a0421 100644
--- a/tests/ut/cpp/pre_activate/pass/eliminate_redundant_op_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/eliminate_redundant_op_test.cc
@@ -15,26 +15,26 @@
  */
 
 #include "common/backend_common_test.h"
-#include "kernel/kernel.h"
-#include "operator/ops.h"
+#include "backend/kernel_compiler/kernel.h"
+#include "frontend/operator/ops.h"
 #include "ir/tensor.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "common/py_func_graph_fetcher.h"
-// #include "device/optimizer/pass/insert_trans_op.h"
-#include "pre_activate/ascend/format_type/insert_cast.h"
-#include "pre_activate/pass/eliminate_redundant_op.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/common/pass_manager.h"
+// #include "runtime/device/optimizer/pass/insert_trans_op.h"
+#include "backend/optimizer/ascend/format_type/insert_cast.h"
+#include "backend/optimizer/pass/eliminate_redundant_op.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/pass_manager.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
-#include "session/anf_runtime_algorithm.h"
-#include "device/kernel_info.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/kernel_info.h"
 #include "utils/context/ms_context.h"
 
 #define private public
 #define protected public
-#include "pre_activate/ascend/format_type/insert_trans_op.h"
+#include "backend/optimizer/ascend/format_type/insert_trans_op.h"
 #undef private
 #undef protected
 
diff --git a/tests/ut/cpp/pre_activate/pass/getitem_tuple_test.cc b/tests/ut/cpp/pre_activate/pass/getitem_tuple_test.cc
index b172e1b3519..555dd954265 100644
--- a/tests/ut/cpp/pre_activate/pass/getitem_tuple_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/getitem_tuple_test.cc
@@ -15,14 +15,14 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "session/ascend_session.h"
-#include "pipeline/resource.h"
-#include "operator/ops.h"
+#include "backend/session/ascend_session.h"
+#include "pipeline/jit/resource.h"
+#include "frontend/operator/ops.h"
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "utils/utils.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/pass/getitem_tuple.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/pass/getitem_tuple.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/pass/optimize_dependence_test.cc b/tests/ut/cpp/pre_activate/pass/optimize_dependence_test.cc
index 04461e66025..f9cfe273bc6 100644
--- a/tests/ut/cpp/pre_activate/pass/optimize_dependence_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/optimize_dependence_test.cc
@@ -15,8 +15,8 @@
  */
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/pass/optimize_dependence.h"
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/pass/optimize_dependence.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pynative/pynative_execute_test.cc b/tests/ut/cpp/pynative/pynative_execute_test.cc
index a0d1516b58d..c5f25ca4848 100644
--- a/tests/ut/cpp/pynative/pynative_execute_test.cc
+++ b/tests/ut/cpp/pynative/pynative_execute_test.cc
@@ -16,10 +16,10 @@
 #include <iostream>
 #include <memory>
 #include "common/common_test.h"
-#include "pipeline/parse/python_adapter.h"
-#include "pipeline/parse/data_converter.h"
-#include "operator/ops.h"
-#include "pynative/pynative_execute.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "pipeline/jit/parse/data_converter.h"
+#include "frontend/operator/ops.h"
+#include "pipeline/pynative/pynative_execute.h"
 #include "utils/context/ms_context.h"
 #include "utils/utils.h"
 
diff --git a/tests/ut/cpp/python_input/gtest_input/optimizer/ad/ad_test.py b/tests/ut/cpp/python_input/gtest_input/optimizer/ad/ad_test.py
index e38c61f16e8..bcfa077ea5e 100644
--- a/tests/ut/cpp/python_input/gtest_input/optimizer/ad/ad_test.py
+++ b/tests/ut/cpp/python_input/gtest_input/optimizer/ad/ad_test.py
@@ -17,8 +17,8 @@ import numpy as np
 
 import mindspore as ms
 from mindspore.common.tensor import Tensor
-from mindspore.model_zoo.resnet import resnet50
 from mindspore.ops import Primitive
+from tests.ut.python.model.resnet import resnet50
 
 scala_add = Primitive('scalar_add')
 
diff --git a/tests/ut/cpp/python_input/gtest_input/pipeline/parse/parser_integrate.py b/tests/ut/cpp/python_input/gtest_input/pipeline/parse/parser_integrate.py
index fa5b1b90558..28bded64016 100644
--- a/tests/ut/cpp/python_input/gtest_input/pipeline/parse/parser_integrate.py
+++ b/tests/ut/cpp/python_input/gtest_input/pipeline/parse/parser_integrate.py
@@ -22,9 +22,9 @@ from mindspore.common import dtype
 from mindspore.common.api import ms_function, _executor
 from mindspore.common.parameter import Parameter
 from mindspore.common.tensor import Tensor
-from mindspore.model_zoo.resnet import resnet50
 from mindspore.ops import functional as F
 from mindspore.train.model import Model
+from tests.ut.python.model.resnet import resnet50
 
 
 def test_high_order_function(a):
diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/add_input_to_output_test.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/add_input_to_output_test.py
new file mode 100644
index 00000000000..4d4fa1fe963
--- /dev/null
+++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/add_input_to_output_test.py
@@ -0,0 +1,39 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+from mindspore.ops import operations as P
+
+ApplyMomentum = P.ApplyMomentum()
+
+
+class FnDict:
+    def __init__(self):
+        self.fnDict = {}
+
+    def __call__(self, fn):
+        self.fnDict[fn.__name__] = fn
+
+    def __getitem__(self, name):
+        return self.fnDict[name]
+
+
+def test_add_input_to_output(tag):
+    fns = FnDict()
+
+    @fns
+    def before(input0, input1, input2, input3, input4):
+        return ApplyMomentum(input0, input1, input2, input3, input4)
+
+    return fns[tag]
diff --git a/tests/ut/cpp/session/anf_runtime_algorithm_test.cc b/tests/ut/cpp/session/anf_runtime_algorithm_test.cc
index 4c94cdde579..ac38e5427e4 100644
--- a/tests/ut/cpp/session/anf_runtime_algorithm_test.cc
+++ b/tests/ut/cpp/session/anf_runtime_algorithm_test.cc
@@ -15,12 +15,12 @@
  */
 
 #include "common/common_test.h"
-#include "ir/param_value_py.h"
-#include "operator/ops.h"
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
-#include "mindspore/ccsrc/device/kernel_info.h"
-#include "mindspore/ccsrc/device/ascend/ascend_device_address.h"
+#include "ir/param_value.h"
+#include "frontend/operator/ops.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "mindspore/ccsrc/runtime/device/kernel_info.h"
+#include "mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h"
 #include "utils/utils.h"
 
 namespace mindspore {
@@ -255,7 +255,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetOutputFormat) {
   AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat32, kNumberTypeFloat32}, {shape, shape}, add.get());
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetOutputsDeviceType({kFloat32->type_id(), kFloat16->type_id()});
@@ -274,7 +274,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetInputFormat) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetInputsDeviceType({kFloat32->type_id(), kFloat16->type_id()});
@@ -293,7 +293,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetPrevNodeOutputFormat) {
   auto pre_add = kernel_graph->NewCNode(pre_node_inputs);
   MS_EXCEPTION_IF_NULL(pre_add);
   pre_add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = pre_add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(pre_add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetOutputsDeviceType({kFloat32->type_id()});
@@ -373,7 +373,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetOutputDeviceShape) {
   MS_EXCEPTION_IF_NULL(add);
   add->set_abstract(tuple_abstract);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetOutputsFormat({kOpFormat_NCHW, kOpFormat_NCHW, kOpFormat_NHWC, kOpFormat_FRAC_NZ});
@@ -404,7 +404,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetInputDeviceShape) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW, kOpFormat_NHWC});
@@ -457,7 +457,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetOutputDeviceDataTypeTest) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetOutputsDeviceType({kFloat32->type_id()});
@@ -474,7 +474,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetInputDeviceDataTypeTest) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetInputsDeviceType({kFloat32->type_id(), kFloat16->type_id()});
@@ -492,7 +492,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetPrevNodeOutputDeviceDataType) {
   auto pre_add = kernel_graph->NewCNode(pre_add_inputs);
   MS_EXCEPTION_IF_NULL(pre_add);
   pre_add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = pre_add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(pre_add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetOutputsDeviceType({kFloat32->type_id()});
@@ -513,7 +513,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetOutputAddr) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   int *addr = nullptr;
   auto device_address = std::make_shared<AscendDeviceAddress>(addr, 1);
@@ -528,7 +528,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetPrevNodeOutputAddr) {
   auto pre_add = kernel_graph->NewCNode(pre_add_inputs);
   MS_EXCEPTION_IF_NULL(pre_add);
   pre_add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = pre_add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(pre_add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   int *addr = nullptr;
   auto device_address = std::make_shared<AscendDeviceAddress>(addr, 1);
@@ -561,7 +561,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetWorkspaceAddr) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   int *addr = nullptr;
   auto device_address = std::make_shared<AscendDeviceAddress>(addr, 1);
@@ -643,7 +643,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetKernelType) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetKernelType(AKG_KERNEL);
@@ -659,7 +659,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetProcessor) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetProcessor(kernel::AICORE);
@@ -675,7 +675,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetFusionType) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   KernelBuildInfoBuilder builder;
   builder.SetFusionType(kernel::CONVLUTION);
@@ -703,7 +703,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetKernelMod) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   d_kernel_info->set_kernel_mod(nullptr);
   EXPECT_EQ(AnfAlgo::GetKernelMod(add), nullptr);
@@ -764,10 +764,9 @@ TEST_F(AnfRuntimeAlgorithmTest, IsRealCNodeKernel) {
 
 TEST_F(AnfRuntimeAlgorithmTest, IsParameterWeight) {
   auto kernel_graph = std::make_shared<KernelGraph>();
-  py::object obj;
   auto parameter_node = kernel_graph->add_parameter();
   MS_EXCEPTION_IF_NULL(parameter_node);
-  auto param_value_new = std::make_shared<ParamValuePy>(obj);
+  auto param_value_new = std::make_shared<ParamValue>();
   parameter_node->set_default_param(param_value_new);
   EXPECT_TRUE(AnfAlgo::IsParameterWeight(parameter_node));
   EXPECT_THROW(AnfAlgo::IsParameterWeight(nullptr), std::runtime_error);
@@ -780,7 +779,7 @@ TEST_F(AnfRuntimeAlgorithmTest, GetStreamId) {
   auto add = kernel_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(add);
   add->set_kernel_info(std::make_shared<KernelInfo>());
-  auto d_kernel_info = add->kernel_info();
+  auto d_kernel_info = dynamic_cast<KernelInfo *>(add->kernel_info());
   MS_EXCEPTION_IF_NULL(d_kernel_info);
   d_kernel_info->set_stream_id(0);
   EXPECT_EQ(AnfAlgo::GetStreamId(add), 0);
diff --git a/tests/ut/cpp/session/kernel_graph_test.cc b/tests/ut/cpp/session/kernel_graph_test.cc
index 75e653c26c2..f24036b4aa2 100644
--- a/tests/ut/cpp/session/kernel_graph_test.cc
+++ b/tests/ut/cpp/session/kernel_graph_test.cc
@@ -15,11 +15,11 @@
  */
 
 #include "common/common_test.h"
-#include "ir/param_value_py.h"
-#include "operator/ops.h"
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
-#include "mindspore/ccsrc/device/kernel_info.h"
+#include "ir/param_value.h"
+#include "frontend/operator/ops.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "mindspore/ccsrc/runtime/device/kernel_info.h"
 #include "utils/utils.h"
 
 namespace mindspore {
@@ -42,7 +42,7 @@ TEST_F(KernelGraphTest, NewValueNode) {
   auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shape);
   add_value->set_abstract(x_abstract);
   add_value->set_kernel_info(std::make_shared<KernelInfo>());
-  auto mutable_kernel_info = add_value->kernel_info();
+  auto mutable_kernel_info = dynamic_cast<device::KernelInfo *>(add_value->kernel_info());
   MS_EXCEPTION_IF_NULL(mutable_kernel_info);
   std::shared_ptr<KernelBuildInfoBuilder> builder = std::make_shared<KernelBuildInfoBuilder>();
   builder->SetOutputsFormat({kOpFormat_FRAC_Z});
@@ -82,8 +82,7 @@ TEST_F(KernelGraphTest, NewParameter) {
   // test weight parameter node as input
   auto weight_parameter_node = anf_graph->add_parameter();
   MS_EXCEPTION_IF_NULL(weight_parameter_node);
-  py::object obj;
-  auto param_value_new = std::make_shared<ParamValuePy>(obj);
+  auto param_value_new = std::make_shared<ParamValue>();
   weight_parameter_node->set_default_param(param_value_new);
   weight_parameter_node->set_abstract(x_abstract);
   auto new_weight_parameter_node = kernel_graph->NewParameter(weight_parameter_node);
diff --git a/tests/ut/cpp/session/session_basic_test.cc b/tests/ut/cpp/session/session_basic_test.cc
index 1a7ca680655..c438c92b520 100644
--- a/tests/ut/cpp/session/session_basic_test.cc
+++ b/tests/ut/cpp/session/session_basic_test.cc
@@ -15,10 +15,10 @@
  */
 
 #include "common/common_test.h"
-#include "operator/ops.h"
-#include "session/ascend_session.h"
-#include "session/kernel_graph.h"
-#include "session/anf_runtime_algorithm.h"
+#include "frontend/operator/ops.h"
+#include "backend/session/ascend_session.h"
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/stub/aicpu/aicpu_stub.cc b/tests/ut/cpp/stub/aicpu/aicpu_stub.cc
index 78ada6de18c..5516d1fdc85 100644
--- a/tests/ut/cpp/stub/aicpu/aicpu_stub.cc
+++ b/tests/ut/cpp/stub/aicpu/aicpu_stub.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/kernel.h"
+#include "backend/kernel_compiler/kernel.h"
 
 namespace mindspore {
 namespace kernel {
diff --git a/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc b/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
index a3a991247cc..234ffdaf6b7 100644
--- a/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
+++ b/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
@@ -15,7 +15,7 @@
  */
 #include <vector>
 #include "framework/ge_runtime/model_runner.h"
-#include "device/ascend/tasksink/runtime_utils.h"
+#include "runtime/device/ascend/tasksink/runtime_utils.h"
 
 namespace ge {
 namespace model_runner {
@@ -32,6 +32,8 @@ bool ModelRunner::LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint
 
 bool ModelRunner::UnloadModel(uint32_t model_id) { return true; }
 
+bool ModelRunner::LoadModelComplete(uint32_t model_id) { return true; }
+
 bool ModelRunner::RunModel(uint32_t model_id, const ge::InputData &input_data, ge::OutputData *output_data) {
   return true;
 }
@@ -45,6 +47,11 @@ const std::vector<uint32_t> &ModelRunner::GetStreamIdList(uint32_t model_id) con
   static std::vector<uint32_t> stream_id_list;
   return stream_id_list;
 }
+
+const std::map<std::string, std::shared_ptr<RuntimeInfo>> &ModelRunner::GetRuntimeInfoMap(uint32_t model_id) const {
+  static std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map;
+  return runtime_info_map;
+}
 }  // namespace model_runner
 }  // namespace ge
 
diff --git a/tests/ut/cpp/stub/kernel/kernel_fusion_stub.cc b/tests/ut/cpp/stub/kernel/kernel_fusion_stub.cc
index ba642dfe189..87ab543c7c1 100755
--- a/tests/ut/cpp/stub/kernel/kernel_fusion_stub.cc
+++ b/tests/ut/cpp/stub/kernel/kernel_fusion_stub.cc
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "kernel/kernel_fusion.h"
-#include "kernel/tbe/tbe_kernel_mod.h"
+#include "backend/kernel_compiler/kernel_fusion.h"
+#include "backend/kernel_compiler/tbe/tbe_kernel_mod.h"
 #include "common/utils.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc b/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc
index 43d0dd4b3fb..f6f2f45092c 100644
--- a/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc
+++ b/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc
@@ -15,7 +15,7 @@
  */
 #include <fstream>
 #include <memory>
-#include "parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
+#include "frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc b/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
index a6ec3a50b5c..85470e2315c 100755
--- a/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
+++ b/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
@@ -13,10 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/ascend/ascend_stream_assign.h"
-#include "device/ascend/ascend_label_assign.h"
-#include "device/ascend/tasksink/task_generator.h"
-#include "device/kernel_adjust.h"
+#include "runtime/device/ascend/ascend_stream_assign.h"
+#include "runtime/device/ascend/ascend_label_assign.h"
+#include "runtime/device/kernel_adjust.h"
 
 namespace mindspore {
 namespace device {
@@ -31,13 +30,6 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr)
 void AscendStreamAssign::GetWaitStreams(vector<uint32_t> *wait_active_stream_list) { return; }
 
 void AscendStreamAssign::GetHcomStreams(std::vector<uint32_t> *streams) { return; }
-
-namespace tasksink {
-bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list,
-                             uint32_t graph_id) {
-  return true;
-}
-}  // namespace tasksink
 }  // namespace ascend
 void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return; }
 bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return true; }
diff --git a/tests/ut/cpp/stub/tasksink/task_sink_stub.cc b/tests/ut/cpp/stub/tasksink/task_sink_stub.cc
new file mode 100644
index 00000000000..0b12a3862c1
--- /dev/null
+++ b/tests/ut/cpp/stub/tasksink/task_sink_stub.cc
@@ -0,0 +1,30 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "runtime/device/ascend/tasksink/task_generator.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+namespace tasksink {
+bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list,
+                             uint32_t graph_id) {
+  return true;
+}
+}  // namespace tasksink
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
\ No newline at end of file
diff --git a/tests/ut/cpp/transform/convert_test.cc b/tests/ut/cpp/transform/convert_test.cc
index f8f48920e00..6902f7d90d9 100644
--- a/tests/ut/cpp/transform/convert_test.cc
+++ b/tests/ut/cpp/transform/convert_test.cc
@@ -20,16 +20,16 @@
 
 #include "transform/transform_base_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "debug/draw.h"
 #include "debug/anf_ir_dump.h"
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
 #include "common/common_test.h"
 
 #define private public
-#include "transform/types.h"
-#include "transform/convert.h"
+#include "transform/graph_ir/types.h"
+#include "transform/graph_ir/convert.h"
 #include "securec/include/securec.h"
 #include "utils/utils.h"
 using std::cout;
diff --git a/tests/ut/cpp/transform/graph_builder_test.cc b/tests/ut/cpp/transform/graph_builder_test.cc
index e92463e2dc0..e4d72b33cb2 100644
--- a/tests/ut/cpp/transform/graph_builder_test.cc
+++ b/tests/ut/cpp/transform/graph_builder_test.cc
@@ -25,8 +25,8 @@
 #endif
 
 #define private public
-#include "transform/graph_builder.h"
-#include "transform/df_graph_manager.h"
+#include "transform/graph_ir/graph_builder.h"
+#include "transform/graph_ir/df_graph_manager.h"
 
 using UT::Common;
 
diff --git a/tests/ut/cpp/transform/graph_manager_test.cc b/tests/ut/cpp/transform/graph_manager_test.cc
index 699f81ca4c8..9e55e1725bb 100644
--- a/tests/ut/cpp/transform/graph_manager_test.cc
+++ b/tests/ut/cpp/transform/graph_manager_test.cc
@@ -25,7 +25,7 @@
 #endif
 
 #define private public
-#include "transform/df_graph_manager.h"
+#include "transform/graph_ir/df_graph_manager.h"
 
 using UT::Common;
 
diff --git a/tests/ut/cpp/transform/graph_runner_test.cc b/tests/ut/cpp/transform/graph_runner_test.cc
index 1b87cea464c..b91ec959d25 100644
--- a/tests/ut/cpp/transform/graph_runner_test.cc
+++ b/tests/ut/cpp/transform/graph_runner_test.cc
@@ -21,10 +21,10 @@
 #include "ir/tensor_py.h"
 #include "transform/transform_base_test.h"
 #include "common/py_func_graph_fetcher.h"
-#include "pipeline/static_analysis/static_analysis.h"
-#include "operator/ops.h"
-#include "transform/df_graph_manager.h"
-#include "transform/convert.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
+#include "frontend/operator/ops.h"
+#include "transform/graph_ir/df_graph_manager.h"
+#include "transform/graph_ir/convert.h"
 #include "utils/utils.h"
 
 #ifdef OPEN_SOURCE
@@ -34,7 +34,7 @@
 #endif
 
 #define private public
-#include "transform/graph_runner.h"
+#include "transform/graph_ir/graph_runner.h"
 
 using mindspore::tensor::TensorPy;
 
diff --git a/tests/ut/cpp/transform/op_adapter_test.cc b/tests/ut/cpp/transform/op_adapter_test.cc
index 254452bb428..2aa6ba37e33 100644
--- a/tests/ut/cpp/transform/op_adapter_test.cc
+++ b/tests/ut/cpp/transform/op_adapter_test.cc
@@ -19,9 +19,9 @@
 
 #include "common/common_test.h"
 
-#include "transform/op_declare.h"
+#include "transform/graph_ir/op_declare.h"
 
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "./common.h"
 
 using std::cout;
diff --git a/tests/ut/cpp/transform/transform_base_test.h b/tests/ut/cpp/transform/transform_base_test.h
index 92147dfbbf0..4886b25748b 100644
--- a/tests/ut/cpp/transform/transform_base_test.h
+++ b/tests/ut/cpp/transform/transform_base_test.h
@@ -20,11 +20,11 @@
 #include <string>
 #include <memory>
 #include <vector>
-#include "transform/util.h"
+#include "transform/graph_ir/util.h"
 #include "ir/tensor.h"
 
 #include "common/common_test.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "./common.h"
 
 #include "graph/tensor.h"
diff --git a/tests/ut/cpp/utils/any_test.cc b/tests/ut/cpp/utils/any_test.cc
index d11831d6028..8a49017d955 100644
--- a/tests/ut/cpp/utils/any_test.cc
+++ b/tests/ut/cpp/utils/any_test.cc
@@ -20,7 +20,7 @@
 #include <unordered_map>
 
 #include "common/common_test.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "utils/any.h"
 #include "utils/misc.h"
 
diff --git a/tests/ut/cpp/utils/callback_test.cc b/tests/ut/cpp/utils/callback_test.cc
index c63f68f000a..0a4ffb81908 100644
--- a/tests/ut/cpp/utils/callback_test.cc
+++ b/tests/ut/cpp/utils/callback_test.cc
@@ -18,9 +18,9 @@
 #include "pybind11/pybind11.h"
 #include "utils/callbacks.h"
 #include "common/common_test.h"
-#include "pipeline/pipeline.h"
-#include "pipeline/parse/python_adapter.h"
-#include "transform/df_graph_manager.h"
+#include "pipeline/jit/pipeline.h"
+#include "pipeline/jit/parse/python_adapter.h"
+#include "transform/graph_ir/df_graph_manager.h"
 #include "debug/draw.h"
 #ifdef ENABLE_GE
 #include "utils/callbacks_ge.h"
diff --git a/tests/ut/cpp/utils/graph_utils_test.cc b/tests/ut/cpp/utils/graph_utils_test.cc
index ce5a4318d3a..35fa9cdc6ae 100644
--- a/tests/ut/cpp/utils/graph_utils_test.cc
+++ b/tests/ut/cpp/utils/graph_utils_test.cc
@@ -24,8 +24,8 @@
 #include "ir/anf.h"
 #include "utils/graph_utils.h"
 
-#include "pipeline/parse/parse_base.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse_base.h"
+#include "pipeline/jit/parse/parse.h"
 
 namespace mindspore {
 
diff --git a/tests/ut/cpp/utils/ir_import_test.cc b/tests/ut/cpp/utils/ir_import_test.cc
index 5e7db98a38d..374c36b4e87 100644
--- a/tests/ut/cpp/utils/ir_import_test.cc
+++ b/tests/ut/cpp/utils/ir_import_test.cc
@@ -19,10 +19,10 @@
 
 #include "utils/log_adapter.h"
 #include "debug/anf_ir_utils.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "ir/manager.h"
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 class TestIrImporter : public UT::Common {
diff --git a/tests/ut/cpp/utils/symbolic_test.cc b/tests/ut/cpp/utils/symbolic_test.cc
index f259b62d6b9..c0abd388d5f 100644
--- a/tests/ut/cpp/utils/symbolic_test.cc
+++ b/tests/ut/cpp/utils/symbolic_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "pipeline/static_analysis/static_analysis.h"
+#include "pipeline/jit/static_analysis/static_analysis.h"
 #include "utils/symbolic.h"
 
 using std::cout;
diff --git a/tests/ut/cpp/utils/validator_test.cc b/tests/ut/cpp/utils/validator_test.cc
index 8eef44bde58..93334d76641 100644
--- a/tests/ut/cpp/utils/validator_test.cc
+++ b/tests/ut/cpp/utils/validator_test.cc
@@ -18,11 +18,11 @@
 #include "common/common_test.h"
 
 #include "utils/log_adapter.h"
-#include "pipeline/validator.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/validator.h"
+#include "pipeline/jit/parse/parse.h"
 #include "ir/manager.h"
-#include "pipeline/static_analysis/prim.h"
-#include "operator/ops.h"
+#include "pipeline/jit/static_analysis/prim.h"
+#include "frontend/operator/ops.h"
 
 namespace mindspore {
 namespace validator {
diff --git a/tests/ut/cpp/vm/segment_runner_test.cc b/tests/ut/cpp/vm/segment_runner_test.cc
index b9bc552d905..c83b1b3434d 100644
--- a/tests/ut/cpp/vm/segment_runner_test.cc
+++ b/tests/ut/cpp/vm/segment_runner_test.cc
@@ -20,11 +20,11 @@
 #include "ir/manager.h"
 #include "utils/log_adapter.h"
 #include "ir/func_graph_cloner.h"
-#include "pipeline/parse/parse.h"
+#include "pipeline/jit/parse/parse.h"
 #include "utils/graph_utils.h"
-#include "pipeline/resource.h"
+#include "pipeline/jit/resource.h"
 #include "debug/draw.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "vm/segment_runner.h"
 #include "vm/transform.h"
 #include "ir/tensor.h"
diff --git a/tests/ut/cpp/vm/vm_test.cc b/tests/ut/cpp/vm/vm_test.cc
index 04633043afd..9168d408c3d 100644
--- a/tests/ut/cpp/vm/vm_test.cc
+++ b/tests/ut/cpp/vm/vm_test.cc
@@ -15,7 +15,7 @@
  */
 #include "vm/vm.h"
 #include "common/common_test.h"
-#include "operator/ops.h"
+#include "frontend/operator/ops.h"
 #include "vm/backend.h"
 
 namespace mindspore {
diff --git a/tests/ut/data/dataset/golden/bounding_box_augment_crop_c_result.npz b/tests/ut/data/dataset/golden/bounding_box_augment_crop_c_result.npz
index e4e92210d7a..14ddc166e26 100644
Binary files a/tests/ut/data/dataset/golden/bounding_box_augment_crop_c_result.npz and b/tests/ut/data/dataset/golden/bounding_box_augment_crop_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/bounding_box_augment_rotation_c_result.npz b/tests/ut/data/dataset/golden/bounding_box_augment_rotation_c_result.npz
index 8cc7e15e31b..07ae4e5892e 100644
Binary files a/tests/ut/data/dataset/golden/bounding_box_augment_rotation_c_result.npz and b/tests/ut/data/dataset/golden/bounding_box_augment_rotation_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/bounding_box_augment_valid_edge_c_result.npz b/tests/ut/data/dataset/golden/bounding_box_augment_valid_edge_c_result.npz
index dafea520fe9..a72643457b8 100644
Binary files a/tests/ut/data/dataset/golden/bounding_box_augment_valid_edge_c_result.npz and b/tests/ut/data/dataset/golden/bounding_box_augment_valid_edge_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/bounding_box_augment_valid_ratio_c_result.npz b/tests/ut/data/dataset/golden/bounding_box_augment_valid_ratio_c_result.npz
index 71e58406acb..9a6ae1cb990 100644
Binary files a/tests/ut/data/dataset/golden/bounding_box_augment_valid_ratio_c_result.npz and b/tests/ut/data/dataset/golden/bounding_box_augment_valid_ratio_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/cache_map_01_result.npz b/tests/ut/data/dataset/golden/cache_map_01_result.npz
new file mode 100644
index 00000000000..7cff9ded889
Binary files /dev/null and b/tests/ut/data/dataset/golden/cache_map_01_result.npz differ
diff --git a/tests/ut/data/dataset/golden/cache_map_02_result.npz b/tests/ut/data/dataset/golden/cache_map_02_result.npz
new file mode 100644
index 00000000000..7cff9ded889
Binary files /dev/null and b/tests/ut/data/dataset/golden/cache_map_02_result.npz differ
diff --git a/tests/ut/data/dataset/golden/random_crop_with_bbox_01_c_result.npz b/tests/ut/data/dataset/golden/random_crop_with_bbox_01_c_result.npz
index 0c220fd09d2..bb33f1becee 100644
Binary files a/tests/ut/data/dataset/golden/random_crop_with_bbox_01_c_result.npz and b/tests/ut/data/dataset/golden/random_crop_with_bbox_01_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/random_horizontal_flip_with_bbox_01_c_result.npz b/tests/ut/data/dataset/golden/random_horizontal_flip_with_bbox_01_c_result.npz
index d360bb98ec7..416223ff4de 100644
Binary files a/tests/ut/data/dataset/golden/random_horizontal_flip_with_bbox_01_c_result.npz and b/tests/ut/data/dataset/golden/random_horizontal_flip_with_bbox_01_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/random_resize_with_bbox_op_01_c_coco_result.npz b/tests/ut/data/dataset/golden/random_resize_with_bbox_op_01_c_coco_result.npz
new file mode 100644
index 00000000000..db62d6509ef
Binary files /dev/null and b/tests/ut/data/dataset/golden/random_resize_with_bbox_op_01_c_coco_result.npz differ
diff --git a/tests/ut/data/dataset/golden/random_resize_with_bbox_op_01_c_voc_result.npz b/tests/ut/data/dataset/golden/random_resize_with_bbox_op_01_c_voc_result.npz
new file mode 100644
index 00000000000..75f4447ded2
Binary files /dev/null and b/tests/ut/data/dataset/golden/random_resize_with_bbox_op_01_c_voc_result.npz differ
diff --git a/tests/ut/data/dataset/golden/random_resized_crop_with_bbox_01_c_result.npz b/tests/ut/data/dataset/golden/random_resized_crop_with_bbox_01_c_result.npz
index a909cbe88c5..aa9778bd39c 100644
Binary files a/tests/ut/data/dataset/golden/random_resized_crop_with_bbox_01_c_result.npz and b/tests/ut/data/dataset/golden/random_resized_crop_with_bbox_01_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/random_vertical_flip_with_bbox_01_c_result.npz b/tests/ut/data/dataset/golden/random_vertical_flip_with_bbox_01_c_result.npz
index aba6fe97b02..e0e0eb28232 100644
Binary files a/tests/ut/data/dataset/golden/random_vertical_flip_with_bbox_01_c_result.npz and b/tests/ut/data/dataset/golden/random_vertical_flip_with_bbox_01_c_result.npz differ
diff --git a/tests/ut/data/dataset/golden/repeat_list_result.npz b/tests/ut/data/dataset/golden/repeat_list_result.npz
index c0240c6e213..883ac58be8a 100644
Binary files a/tests/ut/data/dataset/golden/repeat_list_result.npz and b/tests/ut/data/dataset/golden/repeat_list_result.npz differ
diff --git a/tests/ut/data/dataset/golden/repeat_result.npz b/tests/ut/data/dataset/golden/repeat_result.npz
index 73b0a24b201..2df787cef88 100644
Binary files a/tests/ut/data/dataset/golden/repeat_result.npz and b/tests/ut/data/dataset/golden/repeat_result.npz differ
diff --git a/tests/ut/data/dataset/golden/resize_with_bbox_op_01_c_coco_result.npz b/tests/ut/data/dataset/golden/resize_with_bbox_op_01_c_coco_result.npz
new file mode 100644
index 00000000000..999c15e5f31
Binary files /dev/null and b/tests/ut/data/dataset/golden/resize_with_bbox_op_01_c_coco_result.npz differ
diff --git a/tests/ut/data/dataset/golden/resize_with_bbox_op_01_c_voc_result.npz b/tests/ut/data/dataset/golden/resize_with_bbox_op_01_c_voc_result.npz
new file mode 100644
index 00000000000..ca648849377
Binary files /dev/null and b/tests/ut/data/dataset/golden/resize_with_bbox_op_01_c_voc_result.npz differ
diff --git a/tests/ut/data/dataset/golden/tf_file_no_schema.npz b/tests/ut/data/dataset/golden/tf_file_no_schema.npz
deleted file mode 100644
index b8239985215..00000000000
Binary files a/tests/ut/data/dataset/golden/tf_file_no_schema.npz and /dev/null differ
diff --git a/tests/ut/data/dataset/golden/tf_file_padBytes10.npz b/tests/ut/data/dataset/golden/tf_file_padBytes10.npz
deleted file mode 100644
index e3d6d9934bc..00000000000
Binary files a/tests/ut/data/dataset/golden/tf_file_padBytes10.npz and /dev/null differ
diff --git a/tests/ut/data/dataset/golden/tfreader_result.npz b/tests/ut/data/dataset/golden/tfreader_result.npz
deleted file mode 100644
index 10cad9f2b01..00000000000
Binary files a/tests/ut/data/dataset/golden/tfreader_result.npz and /dev/null differ
diff --git a/tests/ut/data/dataset/golden/tfrecord_files_basic.npz b/tests/ut/data/dataset/golden/tfrecord_files_basic.npz
new file mode 100644
index 00000000000..810182faf90
Binary files /dev/null and b/tests/ut/data/dataset/golden/tfrecord_files_basic.npz differ
diff --git a/tests/ut/data/dataset/golden/tfrecord_no_schema.npz b/tests/ut/data/dataset/golden/tfrecord_no_schema.npz
new file mode 100644
index 00000000000..bda2807e895
Binary files /dev/null and b/tests/ut/data/dataset/golden/tfrecord_no_schema.npz differ
diff --git a/tests/ut/data/dataset/golden/tfrecord_pad_bytes10.npz b/tests/ut/data/dataset/golden/tfrecord_pad_bytes10.npz
new file mode 100644
index 00000000000..580e19de64b
Binary files /dev/null and b/tests/ut/data/dataset/golden/tfrecord_pad_bytes10.npz differ
diff --git a/tests/ut/data/dataset/imagefolder/ExpectedBoundingBoxAugmentOp0.jpg b/tests/ut/data/dataset/imagefolder/ExpectedBoundingBoxAugmentOp0.jpg
new file mode 100644
index 00000000000..242559f2763
Binary files /dev/null and b/tests/ut/data/dataset/imagefolder/ExpectedBoundingBoxAugmentOp0.jpg differ
diff --git a/tests/ut/data/dataset/imagefolder/ExpectedRandomCropWithBBox_C0.jpg b/tests/ut/data/dataset/imagefolder/ExpectedRandomCropWithBBox_C0.jpg
new file mode 100644
index 00000000000..362d841170b
Binary files /dev/null and b/tests/ut/data/dataset/imagefolder/ExpectedRandomCropWithBBox_C0.jpg differ
diff --git a/tests/ut/data/dataset/imagefolder/ExpectedRandomHorizontalFlipWithBBox0.jpg b/tests/ut/data/dataset/imagefolder/ExpectedRandomHorizontalFlipWithBBox0.jpg
new file mode 100644
index 00000000000..3210a7b1feb
Binary files /dev/null and b/tests/ut/data/dataset/imagefolder/ExpectedRandomHorizontalFlipWithBBox0.jpg differ
diff --git a/tests/ut/data/dataset/imagefolder/ExpectedRandomResizeWithBBox_C0.jpg b/tests/ut/data/dataset/imagefolder/ExpectedRandomResizeWithBBox_C0.jpg
new file mode 100644
index 00000000000..235516d75f4
Binary files /dev/null and b/tests/ut/data/dataset/imagefolder/ExpectedRandomResizeWithBBox_C0.jpg differ
diff --git a/tests/ut/data/dataset/imagefolder/ExpectedRandomResizedCropWithBBox_C0.jpg b/tests/ut/data/dataset/imagefolder/ExpectedRandomResizedCropWithBBox_C0.jpg
new file mode 100644
index 00000000000..d7666adb9bc
Binary files /dev/null and b/tests/ut/data/dataset/imagefolder/ExpectedRandomResizedCropWithBBox_C0.jpg differ
diff --git a/tests/ut/data/dataset/imagefolder/ExpectedRandomVerticalFlipWithBBox_C0.jpg b/tests/ut/data/dataset/imagefolder/ExpectedRandomVerticalFlipWithBBox_C0.jpg
new file mode 100644
index 00000000000..c5fe8ff5402
Binary files /dev/null and b/tests/ut/data/dataset/imagefolder/ExpectedRandomVerticalFlipWithBBox_C0.jpg differ
diff --git a/tests/ut/data/dataset/imagefolder/ExpectedResizeWithBBox_C0.jpg b/tests/ut/data/dataset/imagefolder/ExpectedResizeWithBBox_C0.jpg
new file mode 100644
index 00000000000..f6dfd85547d
Binary files /dev/null and b/tests/ut/data/dataset/imagefolder/ExpectedResizeWithBBox_C0.jpg differ
diff --git a/tests/ut/data/dataset/testCifar100Data/datasetSchema.json b/tests/ut/data/dataset/testCifar100Data/datasetSchema.json
deleted file mode 100644
index 474a806bf26..00000000000
--- a/tests/ut/data/dataset/testCifar100Data/datasetSchema.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "datasetType": "CIFAR100",
-  "numRows": 100,
-  "columns": {
-    "image": {
-      "type": "uint8",
-      "rank": 1,
-	  "t_impl": "cvmat"
-    },
-    "coarse_label" : {
-      "type": "uint32",
-      "rank": 1,
-      "t_impl": "flex"
-    },
-    "fine_label" : {
-      "type": "uint32",
-      "rank": 1,
-      "t_impl": "flex"
-    }
-  }
-}
diff --git a/tests/ut/data/dataset/testCifar100Data/datasetSchemaTestRepeat.json b/tests/ut/data/dataset/testCifar100Data/datasetSchemaTestRepeat.json
deleted file mode 100644
index a90edb342b0..00000000000
--- a/tests/ut/data/dataset/testCifar100Data/datasetSchemaTestRepeat.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "datasetType": "CIFAR100",
-  "numRows": 33,
-  "columns": {
-    "image": {
-      "type": "uint8",
-      "rank": 1,
-	  "t_impl": "cvmat"
-    },
-    "coarse_label" : {
-      "type": "uint32",
-      "rank": 1,
-      "t_impl": "flex"
-    },
-    "fine_label" : {
-      "type": "uint32",
-      "rank": 1,
-      "t_impl": "flex"
-    }
-  }
-}
diff --git a/tests/ut/data/dataset/testCifar10Data/data_batch_1.bin b/tests/ut/data/dataset/testCifar10Data/data_batch_1.bin
index 7964f0952cd..b3ec462f799 100644
Binary files a/tests/ut/data/dataset/testCifar10Data/data_batch_1.bin and b/tests/ut/data/dataset/testCifar10Data/data_batch_1.bin differ
diff --git a/tests/ut/data/dataset/testCifar10Data/datasetDistributionAll.json b/tests/ut/data/dataset/testCifar10Data/datasetDistributionAll.json
deleted file mode 100644
index 9234a6e0330..00000000000
--- a/tests/ut/data/dataset/testCifar10Data/datasetDistributionAll.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{ 
-  "deviceNum" : 3,
-  "deviceId" : 1,
-  "shardConfig" : "ALL",
-  "shuffle" : "ON",
-  "seed" : 0,
-  "epoch" : 2
-}
-
diff --git a/tests/ut/data/dataset/testCifar10Data/datasetDistributionRandom.json b/tests/ut/data/dataset/testCifar10Data/datasetDistributionRandom.json
deleted file mode 100644
index 3f61c582a5a..00000000000
--- a/tests/ut/data/dataset/testCifar10Data/datasetDistributionRandom.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{ 
-  "deviceNum" : 3,
-  "deviceId" : 1,
-  "shardConfig" : "RANDOM",
-  "shuffle" : "ON",
-  "seed" : 0,
-  "epoch" : 1
-}
-
diff --git a/tests/ut/data/dataset/testCifar10Data/datasetDistributionUnique.json b/tests/ut/data/dataset/testCifar10Data/datasetDistributionUnique.json
deleted file mode 100644
index 99e685132b9..00000000000
--- a/tests/ut/data/dataset/testCifar10Data/datasetDistributionUnique.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{ 
-  "deviceNum" : 3,
-  "deviceId" : 1,
-  "shardConfig" : "UNIQUE",
-  "shuffle" : "ON",
-  "seed" : 0,
-  "epoch" : 3
-}
-
diff --git a/tests/ut/data/dataset/testCifar10Data/datasetSchema.json b/tests/ut/data/dataset/testCifar10Data/datasetSchema.json
deleted file mode 100644
index 1a04b9af597..00000000000
--- a/tests/ut/data/dataset/testCifar10Data/datasetSchema.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-  "datasetType": "CIFAR10",
-  "numRows": 60000,
-  "columns": {
-    "image": {
-      "type": "uint8",
-      "rank": 1,
-	  "t_impl": "cvmat"
-    },
-    "label" : {
-      "type": "uint32",
-      "rank": 1,
-      "t_impl": "flex"
-    }
-  }
-}
diff --git a/tests/ut/data/dataset/testCifar10Data/datasetSchemaTestRepeat.json b/tests/ut/data/dataset/testCifar10Data/datasetSchemaTestRepeat.json
deleted file mode 100644
index c25e11c30f8..00000000000
--- a/tests/ut/data/dataset/testCifar10Data/datasetSchemaTestRepeat.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-  "datasetType": "CIFAR10",
-  "numRows": 33,
-  "columns": {
-    "image": {
-      "type": "uint8",
-      "rank": 1,
-	  "t_impl": "cvmat"
-    },
-    "label" : {
-      "type": "uint32",
-      "rank": 1,
-      "t_impl": "flex"
-    }
-  }
-}
diff --git a/tests/ut/data/dataset/test_tf_file_3_images_1/datasetSchema.json b/tests/ut/data/dataset/test_tf_file_3_images_1/datasetSchema.json
deleted file mode 100644
index 0aa5a4577a2..00000000000
--- a/tests/ut/data/dataset/test_tf_file_3_images_1/datasetSchema.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-  "datasetType": "TF",
-  "numRows": 3,
-  "columns": {
-    "label": {
-      "type": "int64",
-      "rank": 1,
-      "t_impl": "flex"
-    }
-  }
-}
diff --git a/tests/ut/data/dataset/test_tf_file_3_images_1/train-0000-of-0001.data b/tests/ut/data/dataset/test_tf_file_3_images_1/train-0000-of-0001.data
deleted file mode 100644
index 829e8d70cb9..00000000000
Binary files a/tests/ut/data/dataset/test_tf_file_3_images_1/train-0000-of-0001.data and /dev/null differ
diff --git a/tests/ut/data/dataset/test_tf_file_3_images_2/datasetSchema.json b/tests/ut/data/dataset/test_tf_file_3_images_2/datasetSchema.json
deleted file mode 100644
index b7b3cb9ea32..00000000000
--- a/tests/ut/data/dataset/test_tf_file_3_images_2/datasetSchema.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-  "datasetType": "TF",
-  "numRows": 3,
-  "columns": {
-    "image": {
-      "type": "uint8",
-      "rank": 1,
-      "t_impl": "cvmat"
-    }
-  }
-}
diff --git a/tests/ut/data/dataset/test_tf_file_3_images_2/train-0000-of-0001.data b/tests/ut/data/dataset/test_tf_file_3_images_2/train-0000-of-0001.data
deleted file mode 100644
index 829e8d70cb9..00000000000
Binary files a/tests/ut/data/dataset/test_tf_file_3_images_2/train-0000-of-0001.data and /dev/null differ
diff --git a/tests/ut/data/mindrecord/testGraphData/testdata b/tests/ut/data/mindrecord/testGraphData/testdata
index e206469ac69..52359734692 100644
Binary files a/tests/ut/data/mindrecord/testGraphData/testdata and b/tests/ut/data/mindrecord/testGraphData/testdata differ
diff --git a/tests/ut/data/mindrecord/testGraphData/testdata.db b/tests/ut/data/mindrecord/testGraphData/testdata.db
index 541da0e998e..0f022589f4c 100644
Binary files a/tests/ut/data/mindrecord/testGraphData/testdata.db and b/tests/ut/data/mindrecord/testGraphData/testdata.db differ
diff --git a/tests/ut/python/automl/case.py b/tests/ut/python/automl/case.py
new file mode 100644
index 00000000000..745376277c4
--- /dev/null
+++ b/tests/ut/python/automl/case.py
@@ -0,0 +1,41 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Test case."""
+import numpy as np
+
+import mindspore
+import mindspore.nn as nn
+from mindspore import Tensor, context
+
+
+class Net(nn.Cell):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(1, 3, 3)
+        self.conv2 = nn.Conv2d(1, 3, 5, has_bias=True)
+        self.layers = (self.conv1, self.conv2)
+
+    def construct(self, x, index):
+        x = self.layers[index](x)
+        y = self.conv1(x)
+        return x + y
+
+
+def test_case():
+    context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
+    net = Net()
+    data = Tensor(np.ones((1, 1, 224, 224)), mindspore.float32)
+    idx = Tensor(1, mindspore.int32)
+    net(data, idx)
diff --git a/tests/ut/python/dataset/test_basic_tokenizer.py b/tests/ut/python/dataset/test_basic_tokenizer.py
deleted file mode 100644
index 45c9f94da49..00000000000
--- a/tests/ut/python/dataset/test_basic_tokenizer.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""
-Testing BasicTokenizer op in DE
-"""
-import numpy as np
-import mindspore.dataset as ds
-from mindspore import log as logger
-import mindspore.dataset.text as nlp
-
-BASIC_TOKENIZER_FILE = "../data/dataset/testTokenizerData/basic_tokenizer.txt"
-
-test_paras = [
-    dict(
-        first=1,
-        last=6,
-        expected_tokens=
-        [['Welcome', 'to', 'Beijing', '北', '京', '欢', '迎', '您'],
-         ['長', '風', '破', '浪', '會', '有', '時', '，', '直', '掛', '雲', '帆', '濟', '滄', '海'],
-         ['😀', '嘿', '嘿', '😃', '哈', '哈', '😄', '大', '笑', '😁', '嘻', '嘻'],
-         ['明', '朝', '（', '1368', '—', '1644', '年', '）', '和', '清', '朝',
-          '（', '1644', '—', '1911', '年', '）', '，', '是', '中', '国', '封',
-          '建', '王', '朝', '史', '上', '最', '后', '两', '个', '朝', '代'],
-         ['明', '代', '（', '1368', '-', '1644', '）', 'と', '清', '代',
-          '（', '1644', '-', '1911', '）', 'は', '、', '中', '国', 'の', '封',
-          '建', '王', '朝', 'の', '歴', '史', 'における', '最', '後', 'の2つの', '王', '朝', 'でした'],
-         ['명나라', '(', '1368', '-', '1644', ')', '와', '청나라', '(', '1644', '-', '1911', ')', '는',
-          '중국', '봉건', '왕조의', '역사에서', '마지막', '두', '왕조였다']]
-    ),
-    dict(
-        first=7,
-        last=7,
-        expected_tokens=[['this', 'is', 'a', 'funky', 'string']],
-        lower_case=True
-    ),
-]
-
-
-def check_basic_tokenizer(first, last, expected_tokens, lower_case=False, keep_whitespace=False,
-                          normalization_form=nlp.utils.NormalizeForm.NONE, preserve_unused_token=False):
-    dataset = ds.TextFileDataset(BASIC_TOKENIZER_FILE, shuffle=False)
-    if first > 1:
-        dataset = dataset.skip(first - 1)
-    if last >= first:
-        dataset = dataset.take(last - first + 1)
-
-    basic_tokenizer = nlp.BasicTokenizer(lower_case=lower_case,
-                                         keep_whitespace=keep_whitespace,
-                                         normalization_form=normalization_form,
-                                         preserve_unused_token=preserve_unused_token)
-
-    dataset = dataset.map(operations=basic_tokenizer)
-    count = 0
-    for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text'])
-        logger.info("Out:", text)
-        logger.info("Exp:", expected_tokens[count])
-        np.testing.assert_array_equal(text, expected_tokens[count])
-        count = count + 1
-
-
-def test_basic_tokenizer():
-    """
-    Test BasicTokenizer
-    """
-    for paras in test_paras:
-        check_basic_tokenizer(**paras)
-
-
-if __name__ == '__main__':
-    test_basic_tokenizer()
diff --git a/tests/ut/python/dataset/test_bounding_box_augment.py b/tests/ut/python/dataset/test_bounding_box_augment.py
index fbcb56514f2..8924af968c1 100644
--- a/tests/ut/python/dataset/test_bounding_box_augment.py
+++ b/tests/ut/python/dataset/test_bounding_box_augment.py
@@ -15,36 +15,21 @@
 """
 Testing the bounding box augment op in DE
 """
-from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox, \
-    config_get_set_seed, config_get_set_num_parallel_workers, save_and_check_md5
+
 import numpy as np
 import mindspore.log as logger
 import mindspore.dataset as ds
 import mindspore.dataset.transforms.vision.c_transforms as c_vision
 
+from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox, \
+    config_get_set_seed, config_get_set_num_parallel_workers, save_and_check_md5
+
 GENERATE_GOLDEN = False
 
+# updated VOC dataset with correct annotations
 DATA_DIR = "../data/dataset/testVOC2012_2"
-
-
-def fix_annotate(bboxes):
-    """
-    Fix annotations to format followed by mindspore.
-    :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format
-    :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format
-    """
-    for bbox in bboxes:
-        if bbox.size == 7:
-            tmp = bbox[0]
-            bbox[0] = bbox[1]
-            bbox[1] = bbox[2]
-            bbox[2] = bbox[3]
-            bbox[3] = bbox[4]
-            bbox[4] = tmp
-        else:
-            print("ERROR: Invalid Bounding Box size provided")
-            break
-    return bboxes
+DATA_DIR_2 = ["../data/dataset/testCOCO/train/",
+              "../data/dataset/testCOCO/annotations/train.json"]  # DATA_DIR, ANNOTATION_DIR
 
 
 def test_bounding_box_augment_with_rotation_op(plot_vis=False):
@@ -63,13 +48,6 @@ def test_bounding_box_augment_with_rotation_op(plot_vis=False):
     # Ratio is set to 1 to apply rotation on all bounding boxes.
     test_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1)
 
-    # maps to fix annotations to minddata standard
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -100,22 +78,15 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False):
     """
     logger.info("test_bounding_box_augment_with_crop_op")
 
-    original_seed = config_get_set_seed(1)
+    original_seed = config_get_set_seed(0)
     original_num_parallel_workers = config_get_set_num_parallel_workers(1)
 
     dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
     dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
 
-    # Ratio is set to 1 to apply rotation on all bounding boxes.
-    test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(90), 1)
+    # Ratio is set to 0.9 to apply RandomCrop of size (50, 50) on 90% of the bounding boxes.
+    test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(50), 0.9)
 
-    # maps to fix annotations to minddata standard
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -154,13 +125,6 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False):
 
     test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 0.9)
 
-    # maps to fix annotations to minddata standard
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -183,6 +147,36 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False):
     ds.config.set_num_parallel_workers(original_num_parallel_workers)
 
 
+def test_bounding_box_augment_op_coco_c(plot_vis=False):
+    """
+    Prints images and bboxes side by side with and without BoundingBoxAugment Op applied,
+    Testing with COCO dataset
+    """
+    logger.info("test_bounding_box_augment_op_coco_c")
+
+    dataCoco1 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    dataCoco2 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1)
+
+    dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"],
+                              output_columns=["image", "bbox"],
+                              columns_order=["image", "bbox"],
+                              operations=[test_op])
+
+    unaugSamp, augSamp = [], []
+
+    for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()):
+        unaugSamp.append(unAug)
+        augSamp.append(Aug)
+
+    if plot_vis:
+        visualize_with_bounding_boxes(unaugSamp, augSamp, "bbox")
+
+
 def test_bounding_box_augment_valid_edge_c(plot_vis=False):
     """
     Test BoundingBoxAugment op (testing with valid edge case, box covering full image).
@@ -198,25 +192,18 @@ def test_bounding_box_augment_valid_edge_c(plot_vis=False):
 
     test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1)
 
-    # maps to fix annotations to minddata standard
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     # Add column for "annotation"
     dataVoc1 = dataVoc1.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
                             operations=lambda img, bbox:
-                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.uint32)))
+                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
                             operations=lambda img, bbox:
-                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.uint32)))
+                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
@@ -249,10 +236,6 @@ def test_bounding_box_augment_invalid_ratio_c():
     try:
         # ratio range is from 0 - 1
         test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1.5)
-        # maps to fix annotations to minddata standard
-        dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                                output_columns=["annotation"],
-                                operations=fix_annotate)
         # map to apply ops
         dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                                 output_columns=["image", "annotation"],
@@ -260,7 +243,7 @@ def test_bounding_box_augment_invalid_ratio_c():
                                 operations=[test_op])  # Add column for "annotation"
     except ValueError as error:
         logger.info("Got an exception in DE: {}".format(str(error)))
-        assert "Input is not" in str(error)
+        assert "Input ratio is not within the required interval of (0.0 to 1.0)." in str(error)
 
 
 def test_bounding_box_augment_invalid_bounds_c():
@@ -286,6 +269,7 @@ if __name__ == "__main__":
     # set to false to not show plots
     test_bounding_box_augment_with_rotation_op(plot_vis=False)
     test_bounding_box_augment_with_crop_op(plot_vis=False)
+    test_bounding_box_augment_op_coco_c(plot_vis=False)
     test_bounding_box_augment_valid_ratio_c(plot_vis=False)
     test_bounding_box_augment_valid_edge_c(plot_vis=False)
     test_bounding_box_augment_invalid_ratio_c()
diff --git a/tests/ut/python/dataset/test_bucket_batch_by_length.py b/tests/ut/python/dataset/test_bucket_batch_by_length.py
index febcc6483f7..405b8741103 100644
--- a/tests/ut/python/dataset/test_bucket_batch_by_length.py
+++ b/tests/ut/python/dataset/test_bucket_batch_by_length.py
@@ -17,6 +17,7 @@ import pytest
 import numpy as np
 import mindspore.dataset as ds
 
+
 # generates 1 column [0], [0, 1], ..., [0, ..., n-1]
 def generate_sequential(n):
     for i in range(n):
@@ -44,6 +45,7 @@ def test_bucket_batch_invalid_input():
     bucket_boundaries = [1, 2, 3]
     empty_bucket_boundaries = []
     invalid_bucket_boundaries = ["1", "2", "3"]
+    zero_start_bucket_boundaries = [0, 2, 3]
     negative_bucket_boundaries = [1, 2, -3]
     decreasing_bucket_boundaries = [3, 2, 1]
     non_increasing_bucket_boundaries = [1, 2, 2]
@@ -58,7 +60,7 @@ def test_bucket_batch_invalid_input():
 
     with pytest.raises(TypeError) as info:
         _ = dataset.bucket_batch_by_length(invalid_column_names, bucket_boundaries, bucket_batch_sizes)
-    assert "column_names should be a list of str" in str(info.value)
+    assert "Argument column_names[0] with value 1 is not of type (<class 'str'>,)." in str(info.value)
 
     with pytest.raises(ValueError) as info:
         _ = dataset.bucket_batch_by_length(column_names, empty_bucket_boundaries, bucket_batch_sizes)
@@ -68,9 +70,13 @@ def test_bucket_batch_invalid_input():
         _ = dataset.bucket_batch_by_length(column_names, invalid_bucket_boundaries, bucket_batch_sizes)
     assert "bucket_boundaries should be a list of int" in str(info.value)
 
+    with pytest.raises(ValueError) as info:
+        _ = dataset.bucket_batch_by_length(column_names, zero_start_bucket_boundaries, bucket_batch_sizes)
+    assert "bucket_boundaries must only contain positive numbers." in str(info.value)
+
     with pytest.raises(ValueError) as info:
         _ = dataset.bucket_batch_by_length(column_names, negative_bucket_boundaries, bucket_batch_sizes)
-    assert "bucket_boundaries cannot contain any negative numbers" in str(info.value)
+    assert "bucket_boundaries must only contain positive numbers." in str(info.value)
 
     with pytest.raises(ValueError) as info:
         _ = dataset.bucket_batch_by_length(column_names, decreasing_bucket_boundaries, bucket_batch_sizes)
@@ -99,12 +105,12 @@ def test_bucket_batch_invalid_input():
     with pytest.raises(TypeError) as info:
         _ = dataset.bucket_batch_by_length(column_names, bucket_boundaries, bucket_batch_sizes,
                                            None, None, invalid_type_pad_to_bucket_boundary)
-    assert "Wrong input type for pad_to_bucket_boundary, should be <class 'bool'>" in str(info.value)
+    assert "Argument pad_to_bucket_boundary with value \"\" is not of type (<class \'bool\'>,)." in str(info.value)
 
     with pytest.raises(TypeError) as info:
         _ = dataset.bucket_batch_by_length(column_names, bucket_boundaries, bucket_batch_sizes,
                                            None, None, False, invalid_type_drop_remainder)
-    assert "Wrong input type for drop_remainder, should be <class 'bool'>" in str(info.value)
+    assert "Argument drop_remainder with value \"\" is not of type (<class 'bool'>,)." in str(info.value)
 
 
 def test_bucket_batch_multi_bucket_no_padding():
@@ -272,7 +278,6 @@ def test_bucket_batch_default_pad():
                         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0],
                         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]]]
 
-
     output = []
     for data in dataset.create_dict_iterator():
         output.append(data["col1"].tolist())
diff --git a/tests/ut/python/dataset/test_cache_map.py b/tests/ut/python/dataset/test_cache_map.py
new file mode 100644
index 00000000000..0e42b422aa2
--- /dev/null
+++ b/tests/ut/python/dataset/test_cache_map.py
@@ -0,0 +1,157 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Testing cache operator with mappable datasets
+"""
+import mindspore.dataset as ds
+import mindspore.dataset.transforms.vision.c_transforms as c_vision
+from mindspore import log as logger
+from util import save_and_check_md5
+
+DATA_DIR = "../data/dataset/testImageNetData/train/"
+
+GENERATE_GOLDEN = False
+
+def test_cache_map_basic1():
+    """
+    Test mappable leaf with cache op right over the leaf
+
+       Repeat
+         |
+     Map(decode)
+         |
+       Cache
+         |
+     ImageFolder
+    """
+
+    logger.info("Test cache map basic 1")
+
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    # This DATA_DIR only has 2 images in it
+    ds1 = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR, cache=some_cache)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op)
+    ds1 = ds1.repeat(4)
+
+    filename = "cache_map_01_result.npz"
+    save_and_check_md5(ds1, filename, generate_golden=GENERATE_GOLDEN)
+
+    logger.info("test_cache_map_basic1 Ended.\n")
+
+
+def test_cache_map_basic2():
+    """
+    Test mappable leaf with the cache op later in the tree above the map(decode)
+
+       Repeat
+         |
+       Cache
+         |
+     Map(decode)
+         |
+     ImageFolder
+    """
+
+    logger.info("Test cache map basic 2")
+
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    # This DATA_DIR only has 2 images in it
+    ds1 = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
+    ds1 = ds1.repeat(4)
+
+    filename = "cache_map_02_result.npz"
+    save_and_check_md5(ds1, filename, generate_golden=GENERATE_GOLDEN)
+
+    logger.info("test_cache_map_basic2 Ended.\n")
+
+
+def test_cache_map_basic3():
+    """
+    Test a repeat under mappable cache
+
+        Cache
+          |
+      Map(decode)
+          |
+        Repeat
+          |
+      ImageFolder
+    """
+
+    logger.info("Test cache basic 3")
+
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    # This DATA_DIR only has 2 images in it
+    ds1 = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.repeat(4)
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 8
+    logger.info('test_cache_basic3 Ended.\n')
+
+
+def test_cache_map_failure1():
+    """
+    Test nested cache (failure)
+
+        Repeat
+          |
+        Cache
+          |
+      Map(decode)
+          |
+        Cache
+          |
+      ImageFolder
+
+    """
+    logger.info("Test cache failure 1")
+
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    # This DATA_DIR only has 2 images in it
+    ds1 = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR, cache=some_cache)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
+    ds1 = ds1.repeat(4)
+
+    try:
+        num_iter = 0
+        for _ in ds1.create_dict_iterator():
+            num_iter += 1
+    except RuntimeError as e:
+        logger.info("Got an exception in DE: {}".format(str(e)))
+        assert "Nested cache operations is not supported!" in str(e)
+
+    assert num_iter == 0
+    logger.info('test_cache_failure1 Ended.\n')
+
+if __name__ == '__main__':
+    test_cache_map_basic1()
+    test_cache_map_basic2()
+    test_cache_map_basic3()
+    test_cache_map_failure1()
diff --git a/tests/ut/python/dataset/test_cache_nomap.py b/tests/ut/python/dataset/test_cache_nomap.py
new file mode 100644
index 00000000000..39e00c0621c
--- /dev/null
+++ b/tests/ut/python/dataset/test_cache_nomap.py
@@ -0,0 +1,429 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Testing cache operator with non-mappable datasets
+"""
+import mindspore.common.dtype as mstype
+import mindspore.dataset as ds
+import mindspore.dataset.transforms.vision.c_transforms as c_vision
+from mindspore import log as logger
+
+DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
+SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
+
+GENERATE_GOLDEN = False
+
+def test_cache_nomap_basic1():
+    """
+    A random dataset (a non mappable dataset) with a cache over it just after the leaf
+    """
+
+    logger.info("Test cache nomap basic 1")
+
+    schema = ds.Schema()
+    schema.add_column('image', de_type=mstype.uint8,
+                      shape=[640, 480, 3])  # 921600 bytes (a bit less than 1 MB per image)
+    schema.add_column('label', de_type=mstype.uint8, shape=[1])
+
+    # create a cache.  arbitrary session_id for now
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    # User-created sampler here
+    ds1 = ds.RandomDataset(schema=schema, total_rows=10, num_parallel_workers=4, cache=some_cache)
+    ds1 = ds1.repeat(4)
+
+    num_iter = 0
+    for data in ds1.create_dict_iterator():
+        logger.info("printing the label: {}".format(data["label"]))
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 40
+    logger.info("test_cache_nomap_basic1 Ended.\n")
+
+
+def test_cache_nomap_basic2():
+    """
+    A random dataset (a non mappable dataset) with a cache over it just after the leaf
+    """
+
+    logger.info("Test cache nomap basic 2")
+
+    schema = ds.Schema()
+    schema.add_column('image', de_type=mstype.uint8,
+                      shape=[640, 480, 3])  # 921600 bytes (a bit less than 1 MB per image)
+    schema.add_column('label', de_type=mstype.uint8, shape=[1])
+
+    # create a cache.  arbitrary session_id for now
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    # sampler arg not given directly, however any of these args will auto-generate an appropriate sampler:
+    # num_samples, shuffle, num_shards, shard_id
+    # In this case, the presence of num_samples chooses a sampler.
+    ds1 = ds.RandomDataset(schema=schema, total_rows=20, num_samples=20, num_parallel_workers=4, cache=some_cache)
+    ds1 = ds1.repeat(2)
+
+    num_iter = 0
+    for data in ds1.create_dict_iterator():
+        logger.info("printing the label: {}".format(data["label"]))
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 40
+    logger.info("test_cache_nomap_basic2 Ended.\n")
+
+
+def test_cache_nomap_basic3():
+    """
+    A TF reader dataset (a non mappable dataset) with a cache over it just after the leaf
+
+       Repeat
+         |
+     Map(decode)
+         |
+       Cache
+         |
+      TFReader
+    """
+
+    logger.info("Test cache nomap basic 3")
+
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False, cache=some_cache)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op)
+    ds1 = ds1.repeat(4)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 12
+    logger.info("test_cache_nomap_basic3 Ended.\n")
+
+
+def test_cache_nomap_basic4():
+    """
+    A TF reader dataset (a non mappable dataset) with a map decode and cache after it
+    Since a global shuffle is used for the tf reader, it will inject a shuffle op over the tf.
+    But, if there's a cache later, that shuffle becomes invalid and should be removed.
+
+       Repeat
+         |
+       Cache
+         |
+     Map(decode)
+         |
+      TFReader
+    """
+
+    logger.info("Test cache nomap basic 4")
+
+    # This dataset has 3 records in it only
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+    # With shuffle not being set, TF defaults to a "global" shuffle when there is no cache
+    # in the picture.  This causes a shuffle-injection over the TF.  For clarify, this test will
+    # explicitly give the global option, even though it's the default in python.
+    # But, when caching is added in the ascendent tree above TF, we do global shuffling
+    # through the sampler over the cache, not by the shuffle op.  In that case, tree prepare
+    # will remove the shuffle op that got injected by the initial tree creation.
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL)
+    decode_op = c_vision.Decode()
+
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
+    ds1 = ds1.repeat(4)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 12
+    logger.info("test_cache_nomap_basic4 Ended.\n")
+
+
+def test_cache_nomap_basic5():
+    """
+    A TF reader dataset (a non mappable dataset) with a cache over it just after the leaf
+    Same as test 3, but this one does not have shuffle arg, causing tf to default to global
+    shuffle which attempts to inject a shuffle operator.  However, since there is a cache
+    we do not need global shuffle, so the shuffle will not be built.  It ends up being
+    identical to test basic 3, however we arrive at the same tree in different codepaths
+    (if there was no cache, then the shuffle IS built)
+
+       Repeat
+         |
+     Map(decode)
+         |
+       Cache
+         |
+      TFReader
+    """
+
+    logger.info("Test cache nomap basic 5")
+
+    # This dataset has 3 records in it only
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], cache=some_cache)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op)
+    ds1 = ds1.repeat(4)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 12
+    logger.info("test_cache_nomap_basic5 Ended.\n")
+
+
+def test_cache_nomap_basic6():
+    """
+    A TF reader dataset (a non mappable dataset) with a cache over it just after the leaf
+    In this one, the tf dataset will be given sharding configuration, however since a cache is
+    used, the tree prepare should undo the sharding configuration and instead, a distributed
+    sampler will be chosen with the same shard config.
+
+       Repeat
+         |
+     Map(decode)
+         |
+       Cache
+         |
+      TFReader
+    """
+
+    logger.info("Test cache nomap basic 6")
+
+    # This dataset has 3 records in it only
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    # With only 3 records shard into 3, we expect only 1 record returned for this shard
+    # However, the sharding will be done by the sampler, not by the tf record leaf node
+    # In this case, it is a row-based sharding, not the file-based sharding that would happen if
+    # there was not any cache.
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_shards=3, shard_id=1, cache=some_cache)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op)
+    ds1 = ds1.repeat(4)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 4
+    logger.info("test_cache_nomap_basic6 Ended.\n")
+
+
+def test_cache_nomap_basic7():
+    """
+    A TF reader dataset (a non mappable dataset) that uses global shuffle, and is cached followed by
+    map.
+    In this one, the tf dataset with global shuffle might want to inject a shuffle op over top of the
+    tf reader, but since a cache is given, it will choose not to.
+
+       Repeat
+         |
+     Map(decode)
+         |
+       cache
+         |
+      TFReader
+    """
+
+    logger.info("Test cache nomap basic 7")
+
+    # This dataset has 3 records in it only
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL, cache=some_cache)
+    decode_op = c_vision.Decode()
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op)
+    ds1 = ds1.repeat(4)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 12
+    logger.info("test_cache_nomap_basic7 Ended.\n")
+
+
+def test_cache_nomap_allowed_share1():
+    """
+    It is allowed to share the cache between the following two trees:
+
+       Repeat     Shuffle
+         |           |
+       Cache       Cache
+         |           |
+      TFReader    TFReader
+    """
+
+    logger.info("Test cache nomap allowed share 1")
+
+    ds.config.set_seed(1)
+    # This dataset has 3 records in it only
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False, cache=some_cache)
+    ds1 = ds1.repeat(4)
+
+    ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False, cache=some_cache)
+    ds2 = ds2.shuffle(buffer_size=2)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+    assert num_iter == 12
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+
+    num_iter = 0
+    for _ in ds2.create_dict_iterator():
+        num_iter += 1
+    assert num_iter == 3
+    logger.info("test_cache_nomap_allowed_share1 Ended.\n")
+
+
+def test_cache_nomap_allowed_share2():
+    """
+    It is allowed to share the cache between the following two trees (with map decode):
+
+       Repeat     Shuffle
+         |           |
+       Cache       Cache
+         |           |
+     Map(decode) Map(decode)
+         |           |
+      TFReader    TFReader
+    """
+
+    logger.info("Test cache nomap allowed share 2")
+
+    ds.config.set_seed(1)
+    # This dataset has 3 records in it only
+    some_cache = ds.DatasetCache(session_id=2, size=0, spilling=True)
+    decode_op = c_vision.Decode()
+
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
+    ds1 = ds1.repeat(4)
+
+    ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
+    ds2 = ds2.map(input_columns=["image"], operations=decode_op, cache=some_cache)
+    ds2 = ds2.shuffle(buffer_size=2)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 12
+
+    num_iter = 0
+    for _ in ds2.create_dict_iterator():
+        num_iter += 1
+    assert num_iter == 3
+    logger.info("test_cache_nomap_allowed_share2 Ended.\n")
+
+
+def test_cache_nomap_allowed_share3():
+    """
+    It is allowed to share the cache between the following two trees (different shard ids):
+
+       Repeat                     Repeat
+         |                          |
+       Cache                      Cache
+         |                          |
+      TFReader(shard_id = 0)     TFReader(shard_id = 1)
+    """
+
+    logger.info("Test cache nomap allowed share 3")
+
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+
+    tf_files = ["../data/dataset/tf_file_dataset/test1.data", "../data/dataset/tf_file_dataset/test2.data"]
+    ds1 = ds.TFRecordDataset(tf_files, num_shards=2, shard_id=0, num_samples=3, shuffle=False, cache=some_cache)
+    ds1 = ds1.repeat(4)
+
+    ds2 = ds.TFRecordDataset(tf_files, num_shards=2, shard_id=1, num_samples=3, shuffle=False, cache=some_cache)
+    ds2 = ds2.repeat(4)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 12
+
+    num_iter = 0
+    for _ in ds2.create_dict_iterator():
+        num_iter += 1
+    assert num_iter == 12
+    logger.info("test_cache_nomap_allowed_share3 Ended.\n")
+
+
+def test_cache_nomap_disallowed_share1():
+    """
+    It is not allowed to share the cache between the following two trees:
+
+       Cache       Cache
+         |           |
+     Map(decode) Map(rescale)
+         |           |
+      TFReader    TFReader
+    """
+
+    logger.info("Test cache nomap disallowed share1")
+
+    # This dataset has 3 records in it only
+    some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
+    decode_op = c_vision.Decode()
+    rescale_op = c_vision.Rescale(1.0 / 255.0, -1.0)
+
+    ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
+    ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
+
+    ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
+    ds2 = ds2.map(input_columns=["image"], operations=rescale_op, cache=some_cache)
+
+    num_iter = 0
+    for _ in ds1.create_dict_iterator():
+        num_iter += 1
+    logger.info("Number of data in ds1: {} ".format(num_iter))
+    assert num_iter == 3
+
+    try:
+        sum([1 for _ in ds2])
+    except RuntimeError as e:
+        logger.info("Got an exception in DE: {}".format(str(e)))
+        assert "Attempt to re-use a cache for a different tree!" in str(e)
+
+    logger.info("test_cache_nomap_disallowed_share1 Ended.\n")
+
+
+if __name__ == '__main__':
+    test_cache_nomap_basic1()
+    test_cache_nomap_basic2()
+    test_cache_nomap_basic3()
+    test_cache_nomap_basic4()
+    test_cache_nomap_basic5()
+    test_cache_nomap_basic6()
+    test_cache_nomap_basic7()
+    test_cache_nomap_allowed_share1()
+    test_cache_nomap_allowed_share2()
+    test_cache_nomap_allowed_share3()
+    test_cache_nomap_disallowed_share1()
diff --git a/tests/ut/python/dataset/test_cifarop.py b/tests/ut/python/dataset/test_cifarop.py
deleted file mode 100644
index e944f8703d7..00000000000
--- a/tests/ut/python/dataset/test_cifarop.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-import os
-
-import numpy as np
-
-import mindspore.dataset as ds
-from mindspore import log as logger
-
-# Data for CIFAR and MNIST are not part of build tree
-# They need to be downloaded directly
-# prep_data.py can be executed or code below
-# import sys
-# sys.path.insert(0,"../../data")
-# import prep_data
-# prep_data.download_all_for_test("../../data")
-DATA_DIR_10 = "../data/dataset/testCifar10Data"
-DATA_DIR_100 = "../data/dataset/testCifar100Data"
-
-
-def load_cifar(path):
-    raw = np.empty(0, dtype=np.uint8)
-    for file_name in os.listdir(path):
-        if file_name.endswith(".bin"):
-            with open(os.path.join(path, file_name), mode='rb') as file:
-                raw = np.append(raw, np.fromfile(file, dtype=np.uint8), axis=0)
-    raw = raw.reshape(-1, 3073)
-    labels = raw[:, 0]
-    images = raw[:, 1:]
-    images = images.reshape(-1, 3, 32, 32)
-    images = images.transpose(0, 2, 3, 1)
-    return images, labels
-
-
-def test_case_dataset_cifar10():
-    """
-    dataset parameter
-    """
-    logger.info("Test dataset parameter")
-    # apply dataset operations
-    data1 = ds.Cifar10Dataset(DATA_DIR_10, 100)
-
-    num_iter = 0
-    for _ in data1.create_dict_iterator():
-        # in this example, each dictionary has keys "image" and "label"
-        num_iter += 1
-    assert num_iter == 100
-
-
-def test_case_dataset_cifar100():
-    """
-    dataset parameter
-    """
-    logger.info("Test dataset parameter")
-    # apply dataset operations
-    data1 = ds.Cifar100Dataset(DATA_DIR_100, 100)
-
-    num_iter = 0
-    for _ in data1.create_dict_iterator():
-        # in this example, each dictionary has keys "image" and "label"
-        num_iter += 1
-    assert num_iter == 100
-
-
-def test_reading_cifar10():
-    """
-    Validate CIFAR10 image readings
-    """
-    data1 = ds.Cifar10Dataset(DATA_DIR_10, 100, shuffle=False)
-    images, labels = load_cifar(DATA_DIR_10)
-    for i, d in enumerate(data1.create_dict_iterator()):
-        np.testing.assert_array_equal(d["image"], images[i])
-        np.testing.assert_array_equal(d["label"], labels[i])
-
-
-if __name__ == '__main__':
-    test_case_dataset_cifar10()
-    test_case_dataset_cifar100()
-    test_reading_cifar10()
diff --git a/tests/ut/python/dataset/test_concatenate_op.py b/tests/ut/python/dataset/test_concatenate_op.py
index d04ff497242..f7a432e4716 100644
--- a/tests/ut/python/dataset/test_concatenate_op.py
+++ b/tests/ut/python/dataset/test_concatenate_op.py
@@ -108,7 +108,7 @@ def test_concatenate_op_type_mismatch():
     with pytest.raises(RuntimeError) as error_info:
         for _ in data:
             pass
-    assert "Tensor types do not match" in repr(error_info.value)
+    assert "Tensor types do not match" in str(error_info.value)
 
 
 def test_concatenate_op_type_mismatch2():
@@ -123,7 +123,7 @@ def test_concatenate_op_type_mismatch2():
     with pytest.raises(RuntimeError) as error_info:
         for _ in data:
             pass
-    assert "Tensor types do not match" in repr(error_info.value)
+    assert "Tensor types do not match" in str(error_info.value)
 
 
 def test_concatenate_op_incorrect_dim():
@@ -138,13 +138,13 @@ def test_concatenate_op_incorrect_dim():
     with pytest.raises(RuntimeError) as error_info:
         for _ in data:
             pass
-    assert "Only 1D tensors supported" in repr(error_info.value)
+    assert "Only 1D tensors supported" in str(error_info.value)
 
 
 def test_concatenate_op_wrong_axis():
     with pytest.raises(ValueError) as error_info:
         data_trans.Concatenate(2)
-    assert "only 1D concatenation supported." in repr(error_info.value)
+    assert "only 1D concatenation supported." in str(error_info.value)
 
 
 def test_concatenate_op_negative_axis():
@@ -163,18 +163,11 @@ def test_concatenate_op_negative_axis():
 
 
 def test_concatenate_op_incorrect_input_dim():
-    def gen():
-        yield (np.array(["ss", "ad"], dtype='S'),)
-
     prepend_tensor = np.array([["ss", "ad"], ["ss", "ad"]], dtype='S')
-    data = ds.GeneratorDataset(gen, column_names=["col"])
-    concatenate_op = data_trans.Concatenate(0, prepend_tensor)
 
-    data = data.map(input_columns=["col"], operations=concatenate_op)
-    with pytest.raises(RuntimeError) as error_info:
-        for _ in data:
-            pass
-    assert "Only 1D tensors supported" in repr(error_info.value)
+    with pytest.raises(ValueError) as error_info:
+        data_trans.Concatenate(0, prepend_tensor)
+    assert "can only prepend 1D arrays." in str(error_info.value)
 
 
 if __name__ == "__main__":
diff --git a/tests/ut/python/dataset/test_config.py b/tests/ut/python/dataset/test_config.py
index 259f42d9481..6783eea2fdc 100644
--- a/tests/ut/python/dataset/test_config.py
+++ b/tests/ut/python/dataset/test_config.py
@@ -245,17 +245,17 @@ def test_deterministic_run_distribution():
 
     # First dataset
     data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
-    random_crop_op = c_vision.RandomHorizontalFlip(0.1)
+    random_horizontal_flip_op = c_vision.RandomHorizontalFlip(0.1)
     decode_op = c_vision.Decode()
     data1 = data1.map(input_columns=["image"], operations=decode_op)
-    data1 = data1.map(input_columns=["image"], operations=random_crop_op)
+    data1 = data1.map(input_columns=["image"], operations=random_horizontal_flip_op)
 
     # Second dataset
     data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
     data2 = data2.map(input_columns=["image"], operations=decode_op)
     # If seed is set up on constructor, so the two ops output deterministic sequence
-    random_crop_op2 = c_vision.RandomHorizontalFlip(0.1)
-    data2 = data2.map(input_columns=["image"], operations=random_crop_op2)
+    random_horizontal_flip_op2 = c_vision.RandomHorizontalFlip(0.1)
+    data2 = data2.map(input_columns=["image"], operations=random_horizontal_flip_op2)
 
     for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
         np.testing.assert_equal(item1["image"], item2["image"])
diff --git a/tests/ut/python/dataset/test_dataset_numpy_slices.py b/tests/ut/python/dataset/test_dataset_numpy_slices.py
index 4cd4e26a337..791a5674088 100644
--- a/tests/ut/python/dataset/test_dataset_numpy_slices.py
+++ b/tests/ut/python/dataset/test_dataset_numpy_slices.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import sys
+import pytest
 import numpy as np
+import pandas as pd
 import mindspore.dataset as de
 from mindspore import log as logger
 import mindspore.dataset.transforms.vision.c_transforms as vision
-import pandas as pd
 
 
 def test_numpy_slices_list_1():
@@ -172,8 +174,26 @@ def test_numpy_slices_distributed_sampler():
     assert sum([1 for _ in ds]) == 2
 
 
-def test_numpy_slices_sequential_sampler():
+def test_numpy_slices_distributed_shard_limit():
+    logger.info("Test Slicing a 1D list.")
 
+    np_data = [1, 2, 3]
+    num = sys.maxsize
+    with pytest.raises(ValueError) as err:
+        de.NumpySlicesDataset(np_data, num_shards=num, shard_id=0, shuffle=False)
+    assert "Input num_shards is not within the required interval of (1 to 2147483647)." in str(err.value)
+
+
+def test_numpy_slices_distributed_zero_shard():
+    logger.info("Test Slicing a 1D list.")
+
+    np_data = [1, 2, 3]
+    with pytest.raises(ValueError) as err:
+        de.NumpySlicesDataset(np_data, num_shards=0, shard_id=0, shuffle=False)
+    assert "Input num_shards is not within the required interval of (1 to 2147483647)." in str(err.value)
+
+
+def test_numpy_slices_sequential_sampler():
     logger.info("Test numpy_slices_dataset with SequentialSampler and repeat.")
 
     np_data = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
@@ -183,6 +203,42 @@ def test_numpy_slices_sequential_sampler():
         assert np.equal(data[0], np_data[i % 8]).all()
 
 
+def test_numpy_slices_invalid_column_names_type():
+    logger.info("Test incorrect column_names input")
+    np_data = [1, 2, 3]
+
+    with pytest.raises(TypeError) as err:
+        de.NumpySlicesDataset(np_data, column_names=[1], shuffle=False)
+    assert "Argument column_names[0] with value 1 is not of type (<class 'str'>,)." in str(err.value)
+
+
+def test_numpy_slices_invalid_column_names_string():
+    logger.info("Test incorrect column_names input")
+    np_data = [1, 2, 3]
+
+    with pytest.raises(ValueError) as err:
+        de.NumpySlicesDataset(np_data, column_names=[""], shuffle=False)
+    assert "column_names[0] should not be empty" in str(err.value)
+
+
+def test_numpy_slices_invalid_empty_column_names():
+    logger.info("Test incorrect column_names input")
+    np_data = [1, 2, 3]
+
+    with pytest.raises(ValueError) as err:
+        de.NumpySlicesDataset(np_data, column_names=[], shuffle=False)
+    assert "column_names should not be empty" in str(err.value)
+
+
+def test_numpy_slices_invalid_empty_data_column():
+    logger.info("Test incorrect column_names input")
+    np_data = []
+
+    with pytest.raises(ValueError) as err:
+        de.NumpySlicesDataset(np_data, shuffle=False)
+    assert "Argument data cannot be empty" in str(err.value)
+
+
 if __name__ == "__main__":
     test_numpy_slices_list_1()
     test_numpy_slices_list_2()
@@ -196,4 +252,10 @@ if __name__ == "__main__":
     test_numpy_slices_csv_dict()
     test_numpy_slices_num_samplers()
     test_numpy_slices_distributed_sampler()
+    test_numpy_slices_distributed_shard_limit()
+    test_numpy_slices_distributed_zero_shard()
     test_numpy_slices_sequential_sampler()
+    test_numpy_slices_invalid_column_names_type()
+    test_numpy_slices_invalid_column_names_string()
+    test_numpy_slices_invalid_empty_column_names()
+    test_numpy_slices_invalid_empty_data_column()
diff --git a/tests/ut/python/dataset/test_datasets_cifarop.py b/tests/ut/python/dataset/test_datasets_cifarop.py
new file mode 100644
index 00000000000..d6d3029b53b
--- /dev/null
+++ b/tests/ut/python/dataset/test_datasets_cifarop.py
@@ -0,0 +1,387 @@
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Test Cifar10 and Cifar100 dataset operators
+"""
+import os
+import pytest
+import numpy as np
+import matplotlib.pyplot as plt
+import mindspore.dataset as ds
+from mindspore import log as logger
+
+DATA_DIR_10 = "../data/dataset/testCifar10Data"
+DATA_DIR_100 = "../data/dataset/testCifar100Data"
+
+
+def load_cifar(path, kind="cifar10"):
+    """
+    load Cifar10/100 data
+    """
+    raw = np.empty(0, dtype=np.uint8)
+    for file_name in os.listdir(path):
+        if file_name.endswith(".bin"):
+            with open(os.path.join(path, file_name), mode='rb') as file:
+                raw = np.append(raw, np.fromfile(file, dtype=np.uint8), axis=0)
+    if kind == "cifar10":
+        raw = raw.reshape(-1, 3073)
+        labels = raw[:, 0]
+        images = raw[:, 1:]
+    elif kind == "cifar100":
+        raw = raw.reshape(-1, 3074)
+        labels = raw[:, :2]
+        images = raw[:, 2:]
+    else:
+        raise ValueError("Invalid parameter value")
+    images = images.reshape(-1, 3, 32, 32)
+    images = images.transpose(0, 2, 3, 1)
+    return images, labels
+
+
+def visualize_dataset(images, labels):
+    """
+    Helper function to visualize the dataset samples
+    """
+    num_samples = len(images)
+    for i in range(num_samples):
+        plt.subplot(1, num_samples, i + 1)
+        plt.imshow(images[i])
+        plt.title(labels[i])
+    plt.show()
+
+
+### Testcases for Cifar10Dataset Op ###
+
+
+def test_cifar10_content_check():
+    """
+    Validate Cifar10Dataset image readings
+    """
+    logger.info("Test Cifar10Dataset Op with content check")
+    data1 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=100, shuffle=False)
+    images, labels = load_cifar(DATA_DIR_10)
+    num_iter = 0
+    # in this example, each dictionary has keys "image" and "label"
+    for i, d in enumerate(data1.create_dict_iterator()):
+        np.testing.assert_array_equal(d["image"], images[i])
+        np.testing.assert_array_equal(d["label"], labels[i])
+        num_iter += 1
+    assert num_iter == 100
+
+
+def test_cifar10_basic():
+    """
+    Validate CIFAR10
+    """
+    logger.info("Test Cifar10Dataset Op")
+
+    # case 1: test num_samples
+    data1 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=100)
+    num_iter1 = 0
+    for _ in data1.create_dict_iterator():
+        num_iter1 += 1
+    assert num_iter1 == 100
+
+    # case 2: test num_parallel_workers
+    data2 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=50, num_parallel_workers=1)
+    num_iter2 = 0
+    for _ in data2.create_dict_iterator():
+        num_iter2 += 1
+    assert num_iter2 == 50
+
+    # case 3: test repeat
+    data3 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=100)
+    data3 = data3.repeat(3)
+    num_iter3 = 0
+    for _ in data3.create_dict_iterator():
+        num_iter3 += 1
+    assert num_iter3 == 300
+
+    # case 4: test batch with drop_remainder=False
+    data4 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=100)
+    assert data4.get_dataset_size() == 100
+    assert data4.get_batch_size() == 1
+    data4 = data4.batch(batch_size=7)  # drop_remainder is default to be False
+    assert data4.get_dataset_size() == 15
+    assert data4.get_batch_size() == 7
+    num_iter4 = 0
+    for _ in data4.create_dict_iterator():
+        num_iter4 += 1
+    assert num_iter4 == 15
+
+    # case 5: test batch with drop_remainder=True
+    data5 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=100)
+    assert data5.get_dataset_size() == 100
+    assert data5.get_batch_size() == 1
+    data5 = data5.batch(batch_size=7, drop_remainder=True)  # the rest of incomplete batch will be dropped
+    assert data5.get_dataset_size() == 14
+    assert data5.get_batch_size() == 7
+    num_iter5 = 0
+    for _ in data5.create_dict_iterator():
+        num_iter5 += 1
+    assert num_iter5 == 14
+
+
+def test_cifar10_pk_sampler():
+    """
+    Test Cifar10Dataset with PKSampler
+    """
+    logger.info("Test Cifar10Dataset Op with PKSampler")
+    golden = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4,
+              5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9]
+    sampler = ds.PKSampler(3)
+    data = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler)
+    num_iter = 0
+    label_list = []
+    for item in data.create_dict_iterator():
+        label_list.append(item["label"])
+        num_iter += 1
+    np.testing.assert_array_equal(golden, label_list)
+    assert num_iter == 30
+
+
+def test_cifar10_sequential_sampler():
+    """
+    Test Cifar10Dataset with SequentialSampler
+    """
+    logger.info("Test Cifar10Dataset Op with SequentialSampler")
+    num_samples = 30
+    sampler = ds.SequentialSampler(num_samples=num_samples)
+    data1 = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler)
+    data2 = ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_samples=num_samples)
+    num_iter = 0
+    for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
+        np.testing.assert_equal(item1["label"], item2["label"])
+        num_iter += 1
+    assert num_iter == num_samples
+
+
+def test_cifar10_exception():
+    """
+    Test error cases for Cifar10Dataset
+    """
+    logger.info("Test error cases for Cifar10Dataset")
+    error_msg_1 = "sampler and shuffle cannot be specified at the same time"
+    with pytest.raises(RuntimeError, match=error_msg_1):
+        ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, sampler=ds.PKSampler(3))
+
+    error_msg_2 = "sampler and sharding cannot be specified at the same time"
+    with pytest.raises(RuntimeError, match=error_msg_2):
+        ds.Cifar10Dataset(DATA_DIR_10, sampler=ds.PKSampler(3), num_shards=2, shard_id=0)
+
+    error_msg_3 = "num_shards is specified and currently requires shard_id as well"
+    with pytest.raises(RuntimeError, match=error_msg_3):
+        ds.Cifar10Dataset(DATA_DIR_10, num_shards=10)
+
+    error_msg_4 = "shard_id is specified but num_shards is not"
+    with pytest.raises(RuntimeError, match=error_msg_4):
+        ds.Cifar10Dataset(DATA_DIR_10, shard_id=0)
+
+    error_msg_5 = "Input shard_id is not within the required interval"
+    with pytest.raises(ValueError, match=error_msg_5):
+        ds.Cifar10Dataset(DATA_DIR_10, num_shards=2, shard_id=-1)
+    with pytest.raises(ValueError, match=error_msg_5):
+        ds.Cifar10Dataset(DATA_DIR_10, num_shards=2, shard_id=5)
+
+    error_msg_6 = "num_parallel_workers exceeds"
+    with pytest.raises(ValueError, match=error_msg_6):
+        ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_parallel_workers=0)
+    with pytest.raises(ValueError, match=error_msg_6):
+        ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_parallel_workers=88)
+
+
+def test_cifar10_visualize(plot=False):
+    """
+    Visualize Cifar10Dataset results
+    """
+    logger.info("Test Cifar10Dataset visualization")
+
+    data1 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
+    num_iter = 0
+    image_list, label_list = [], []
+    for item in data1.create_dict_iterator():
+        image = item["image"]
+        label = item["label"]
+        image_list.append(image)
+        label_list.append("label {}".format(label))
+        assert isinstance(image, np.ndarray)
+        assert image.shape == (32, 32, 3)
+        assert image.dtype == np.uint8
+        assert label.dtype == np.uint32
+        num_iter += 1
+    assert num_iter == 10
+    if plot:
+        visualize_dataset(image_list, label_list)
+
+
+### Testcases for Cifar100Dataset Op ###
+
+def test_cifar100_content_check():
+    """
+    Validate Cifar100Dataset image readings
+    """
+    logger.info("Test Cifar100Dataset with content check")
+    data1 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=100, shuffle=False)
+    images, labels = load_cifar(DATA_DIR_100, kind="cifar100")
+    num_iter = 0
+    # in this example, each dictionary has keys "image", "coarse_label" and "fine_image"
+    for i, d in enumerate(data1.create_dict_iterator()):
+        np.testing.assert_array_equal(d["image"], images[i])
+        np.testing.assert_array_equal(d["coarse_label"], labels[i][0])
+        np.testing.assert_array_equal(d["fine_label"], labels[i][1])
+        num_iter += 1
+    assert num_iter == 100
+
+
+def test_cifar100_basic():
+    """
+    Test Cifar100Dataset
+    """
+    logger.info("Test Cifar100Dataset")
+
+    # case 1: test num_samples
+    data1 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=100)
+    num_iter1 = 0
+    for _ in data1.create_dict_iterator():
+        num_iter1 += 1
+    assert num_iter1 == 100
+
+    # case 2: test repeat
+    data1 = data1.repeat(2)
+    num_iter2 = 0
+    for _ in data1.create_dict_iterator():
+        num_iter2 += 1
+    assert num_iter2 == 200
+
+    # case 3: test num_parallel_workers
+    data2 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=100, num_parallel_workers=1)
+    num_iter3 = 0
+    for _ in data2.create_dict_iterator():
+        num_iter3 += 1
+    assert num_iter3 == 100
+
+    # case 4: test batch with drop_remainder=False
+    data3 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=100)
+    assert data3.get_dataset_size() == 100
+    assert data3.get_batch_size() == 1
+    data3 = data3.batch(batch_size=3)
+    assert data3.get_dataset_size() == 34
+    assert data3.get_batch_size() == 3
+    num_iter4 = 0
+    for _ in data3.create_dict_iterator():
+        num_iter4 += 1
+    assert num_iter4 == 34
+
+    # case 4: test batch with drop_remainder=True
+    data4 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=100)
+    data4 = data4.batch(batch_size=3, drop_remainder=True)
+    assert data4.get_dataset_size() == 33
+    assert data4.get_batch_size() == 3
+    num_iter5 = 0
+    for _ in data4.create_dict_iterator():
+        num_iter5 += 1
+    assert num_iter5 == 33
+
+
+def test_cifar100_pk_sampler():
+    """
+    Test Cifar100Dataset with PKSampler
+    """
+    logger.info("Test Cifar100Dataset with PKSampler")
+    golden = [i for i in range(20)]
+    sampler = ds.PKSampler(1)
+    data = ds.Cifar100Dataset(DATA_DIR_100, sampler=sampler)
+    num_iter = 0
+    label_list = []
+    for item in data.create_dict_iterator():
+        label_list.append(item["coarse_label"])
+        num_iter += 1
+    np.testing.assert_array_equal(golden, label_list)
+    assert num_iter == 20
+
+
+def test_cifar100_exception():
+    """
+    Test error cases for Cifar100Dataset
+    """
+    logger.info("Test error cases for Cifar100Dataset")
+    error_msg_1 = "sampler and shuffle cannot be specified at the same time"
+    with pytest.raises(RuntimeError, match=error_msg_1):
+        ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, sampler=ds.PKSampler(3))
+
+    error_msg_2 = "sampler and sharding cannot be specified at the same time"
+    with pytest.raises(RuntimeError, match=error_msg_2):
+        ds.Cifar100Dataset(DATA_DIR_100, sampler=ds.PKSampler(3), num_shards=2, shard_id=0)
+
+    error_msg_3 = "num_shards is specified and currently requires shard_id as well"
+    with pytest.raises(RuntimeError, match=error_msg_3):
+        ds.Cifar100Dataset(DATA_DIR_100, num_shards=10)
+
+    error_msg_4 = "shard_id is specified but num_shards is not"
+    with pytest.raises(RuntimeError, match=error_msg_4):
+        ds.Cifar100Dataset(DATA_DIR_100, shard_id=0)
+
+    error_msg_5 = "Input shard_id is not within the required interval"
+    with pytest.raises(ValueError, match=error_msg_5):
+        ds.Cifar100Dataset(DATA_DIR_100, num_shards=2, shard_id=-1)
+    with pytest.raises(ValueError, match=error_msg_5):
+        ds.Cifar10Dataset(DATA_DIR_100, num_shards=2, shard_id=5)
+
+    error_msg_6 = "num_parallel_workers exceeds"
+    with pytest.raises(ValueError, match=error_msg_6):
+        ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, num_parallel_workers=0)
+    with pytest.raises(ValueError, match=error_msg_6):
+        ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, num_parallel_workers=88)
+
+
+def test_cifar100_visualize(plot=False):
+    """
+    Visualize Cifar100Dataset results
+    """
+    logger.info("Test Cifar100Dataset visualization")
+
+    data1 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=10, shuffle=False)
+    num_iter = 0
+    image_list, label_list = [], []
+    for item in data1.create_dict_iterator():
+        image = item["image"]
+        coarse_label = item["coarse_label"]
+        fine_label = item["fine_label"]
+        image_list.append(image)
+        label_list.append("coarse_label {}\nfine_label {}".format(coarse_label, fine_label))
+        assert isinstance(image, np.ndarray)
+        assert image.shape == (32, 32, 3)
+        assert image.dtype == np.uint8
+        assert coarse_label.dtype == np.uint32
+        assert fine_label.dtype == np.uint32
+        num_iter += 1
+    assert num_iter == 10
+    if plot:
+        visualize_dataset(image_list, label_list)
+
+
+if __name__ == '__main__':
+    test_cifar10_content_check()
+    test_cifar10_basic()
+    test_cifar10_pk_sampler()
+    test_cifar10_sequential_sampler()
+    test_cifar10_exception()
+    test_cifar10_visualize(plot=False)
+
+    test_cifar100_content_check()
+    test_cifar100_basic()
+    test_cifar100_pk_sampler()
+    test_cifar100_exception()
+    test_cifar100_visualize(plot=False)
diff --git a/tests/ut/python/dataset/test_datasets_imagenet.py b/tests/ut/python/dataset/test_datasets_imagenet.py
deleted file mode 100644
index a6e2afa65a6..00000000000
--- a/tests/ut/python/dataset/test_datasets_imagenet.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-import mindspore.dataset as ds
-import mindspore.dataset.transforms.c_transforms as data_trans
-import mindspore.dataset.transforms.vision.c_transforms as vision
-from mindspore import log as logger
-
-DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
-SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
-
-
-def test_case_repeat():
-    """
-    a simple repeat operation.
-    """
-    logger.info("Test Simple Repeat")
-    # define parameters
-    repeat_count = 2
-
-    # apply dataset operations
-    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
-    data1 = data1.repeat(repeat_count)
-
-    num_iter = 0
-    for item in data1.create_dict_iterator():  # each data is a dictionary
-        # in this example, each dictionary has keys "image" and "label"
-        logger.info("image is: {}".format(item["image"]))
-        logger.info("label is: {}".format(item["label"]))
-        num_iter += 1
-
-    logger.info("Number of data in data1: {}".format(num_iter))
-
-
-def test_case_shuffle():
-    """
-        a simple shuffle operation.
-    """
-    logger.info("Test Simple Shuffle")
-    # define parameters
-    buffer_size = 8
-    seed = 10
-
-    # apply dataset operations
-    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
-    ds.config.set_seed(seed)
-    data1 = data1.shuffle(buffer_size=buffer_size)
-
-    for item in data1.create_dict_iterator():
-        logger.info("image is: {}".format(item["image"]))
-        logger.info("label is: {}".format(item["label"]))
-
-
-def test_case_0():
-    """
-    Test Repeat then Shuffle
-    """
-    logger.info("Test Repeat then Shuffle")
-    # define parameters
-    repeat_count = 2
-    buffer_size = 7
-    seed = 9
-
-    # apply dataset operations
-    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
-    data1 = data1.repeat(repeat_count)
-    ds.config.set_seed(seed)
-    data1 = data1.shuffle(buffer_size=buffer_size)
-
-    num_iter = 0
-    for item in data1.create_dict_iterator():  # each data is a dictionary
-        # in this example, each dictionary has keys "image" and "label"
-        logger.info("image is: {}".format(item["image"]))
-        logger.info("label is: {}".format(item["label"]))
-        num_iter += 1
-
-    logger.info("Number of data in data1: {}".format(num_iter))
-
-
-def test_case_0_reverse():
-    """
-    Test Shuffle then Repeat
-    """
-    logger.info("Test Shuffle then Repeat")
-    # define parameters
-    repeat_count = 2
-    buffer_size = 10
-    seed = 9
-
-    # apply dataset operations
-    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
-    ds.config.set_seed(seed)
-    data1 = data1.shuffle(buffer_size=buffer_size)
-    data1 = data1.repeat(repeat_count)
-
-    num_iter = 0
-    for item in data1.create_dict_iterator():  # each data is a dictionary
-        # in this example, each dictionary has keys "image" and "label"
-        logger.info("image is: {}".format(item["image"]))
-        logger.info("label is: {}".format(item["label"]))
-        num_iter += 1
-
-    logger.info("Number of data in data1: {}".format(num_iter))
-
-
-def test_case_3():
-    """
-    Test Map
-    """
-    logger.info("Test Map Rescale and Resize, then Shuffle")
-    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
-    # define data augmentation parameters
-    rescale = 1.0 / 255.0
-    shift = 0.0
-    resize_height, resize_width = 224, 224
-
-    # define map operations
-    decode_op = vision.Decode()
-    rescale_op = vision.Rescale(rescale, shift)
-    # resize_op = vision.Resize(resize_height, resize_width,
-    #                            InterpolationMode.DE_INTER_LINEAR)  # Bilinear mode
-    resize_op = vision.Resize((resize_height, resize_width))
-
-    # apply map operations on images
-    data1 = data1.map(input_columns=["image"], operations=decode_op)
-    data1 = data1.map(input_columns=["image"], operations=rescale_op)
-    data1 = data1.map(input_columns=["image"], operations=resize_op)
-
-    # # apply ont-hot encoding on labels
-    num_classes = 4
-    one_hot_encode = data_trans.OneHot(num_classes)  # num_classes is input argument
-    data1 = data1.map(input_columns=["label"], operations=one_hot_encode)
-    #
-    # # apply Datasets
-    buffer_size = 100
-    seed = 10
-    batch_size = 2
-    ds.config.set_seed(seed)
-    data1 = data1.shuffle(buffer_size=buffer_size)  # 10000 as in imageNet train script
-    data1 = data1.batch(batch_size, drop_remainder=True)
-
-    num_iter = 0
-    for item in data1.create_dict_iterator():  # each data is a dictionary
-        # in this example, each dictionary has keys "image" and "label"
-        logger.info("image is: {}".format(item["image"]))
-        logger.info("label is: {}".format(item["label"]))
-        num_iter += 1
-
-    logger.info("Number of data in data1: {}".format(num_iter))
-
-
-if __name__ == '__main__':
-    logger.info('===========now test Repeat============')
-    # logger.info('Simple Repeat')
-    test_case_repeat()
-    logger.info('\n')
-
-    logger.info('===========now test Shuffle===========')
-    # logger.info('Simple Shuffle')
-    test_case_shuffle()
-    logger.info('\n')
-
-    # Note: cannot work with different shapes, hence not for image
-    # logger.info('===========now test Batch=============')
-    # # logger.info('Simple Batch')
-    # test_case_batch()
-    # logger.info('\n')
-
-    logger.info('===========now test case 0============')
-    # logger.info('Repeat then Shuffle')
-    test_case_0()
-    logger.info('\n')
-
-    logger.info('===========now test case 0 reverse============')
-    # # logger.info('Shuffle then  Repeat')
-    test_case_0_reverse()
-    logger.info('\n')
-
-    # logger.info('===========now test case 1============')
-    # # logger.info('Repeat with Batch')
-    # test_case_1()
-    # logger.info('\n')
-
-    # logger.info('===========now test case 2============')
-    # # logger.info('Batch with Shuffle')
-    # test_case_2()
-    # logger.info('\n')
-
-    # for image augmentation only
-    logger.info('===========now test case 3============')
-    logger.info('Map then Shuffle')
-    test_case_3()
-    logger.info('\n')
diff --git a/tests/ut/python/dataset/test_datasets_imagenet_distribution.py b/tests/ut/python/dataset/test_datasets_imagenet_distribution.py
deleted file mode 100644
index 92bdb68dc59..00000000000
--- a/tests/ut/python/dataset/test_datasets_imagenet_distribution.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-import mindspore.dataset as ds
-from mindspore import log as logger
-
-DATA_DIR = ["../data/dataset/test_tf_file_3_images2/train-0000-of-0001.data",
-            "../data/dataset/test_tf_file_3_images2/train-0000-of-0002.data",
-            "../data/dataset/test_tf_file_3_images2/train-0000-of-0003.data",
-            "../data/dataset/test_tf_file_3_images2/train-0000-of-0004.data"]
-
-SCHEMA_DIR = "../data/dataset/test_tf_file_3_images2/datasetSchema.json"
-
-
-def test_tf_file_normal():
-    # apply dataset operations
-    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
-    data1 = data1.repeat(1)
-    num_iter = 0
-    for _ in data1.create_dict_iterator():  # each data is a dictionary
-        num_iter += 1
-
-    logger.info("Number of data in data1: {}".format(num_iter))
-    assert num_iter == 12
-
-
-if __name__ == '__main__':
-    logger.info('=======test normal=======')
-    test_tf_file_normal()
diff --git a/tests/ut/python/dataset/test_datasets_voc.py b/tests/ut/python/dataset/test_datasets_voc.py
index 8db65e97340..37f4a8c1233 100644
--- a/tests/ut/python/dataset/test_datasets_voc.py
+++ b/tests/ut/python/dataset/test_datasets_voc.py
@@ -37,7 +37,7 @@ def test_voc_detection():
     for item in data1.create_dict_iterator():
         assert item["image"].shape[0] == IMAGE_SHAPE[num]
         for bbox in item["annotation"]:
-            count[bbox[0]] += 1
+            count[int(bbox[6])] += 1
         num += 1
     assert num == 9
     assert count == [3, 2, 1, 2, 4, 3]
@@ -55,8 +55,8 @@ def test_voc_class_index():
     count = [0, 0, 0, 0, 0, 0]
     for item in data1.create_dict_iterator():
         for bbox in item["annotation"]:
-            assert (bbox[0] == 0 or bbox[0] == 1 or bbox[0] == 5)
-            count[bbox[0]] += 1
+            assert (int(bbox[6]) == 0 or int(bbox[6]) == 1 or int(bbox[6]) == 5)
+            count[int(bbox[6])] += 1
         num += 1
     assert num == 6
     assert count == [3, 2, 0, 0, 0, 3]
@@ -73,8 +73,9 @@ def test_voc_get_class_indexing():
     count = [0, 0, 0, 0, 0, 0]
     for item in data1.create_dict_iterator():
         for bbox in item["annotation"]:
-            assert (bbox[0] == 0 or bbox[0] == 1 or bbox[0] == 2 or bbox[0] == 3 or bbox[0] == 4 or bbox[0] == 5)
-            count[bbox[0]] += 1
+            assert (int(bbox[6]) == 0 or int(bbox[6]) == 1 or int(bbox[6]) == 2 or int(bbox[6]) == 3
+                    or int(bbox[6]) == 4 or int(bbox[6]) == 5)
+            count[int(bbox[6])] += 1
         num += 1
     assert num == 9
     assert count == [3, 2, 1, 2, 4, 3]
diff --git a/tests/ut/python/dataset/test_exceptions.py b/tests/ut/python/dataset/test_exceptions.py
index cbfa402bb06..253eb564aeb 100644
--- a/tests/ut/python/dataset/test_exceptions.py
+++ b/tests/ut/python/dataset/test_exceptions.py
@@ -28,9 +28,9 @@ def test_exception_01():
     """
     logger.info("test_exception_01")
     data = ds.TFRecordDataset(DATA_DIR, columns_list=["image"])
-    with pytest.raises(ValueError) as info:
-        data = data.map(input_columns=["image"], operations=vision.Resize(100, 100))
-    assert "Invalid interpolation mode." in str(info.value)
+    with pytest.raises(TypeError) as info:
+        data.map(input_columns=["image"], operations=vision.Resize(100, 100))
+    assert "Argument interpolation with value 100 is not of type (<enum 'Inter'>,)" in str(info.value)
 
 
 def test_exception_02():
@@ -40,8 +40,8 @@ def test_exception_02():
     logger.info("test_exception_02")
     num_samples = -1
     with pytest.raises(ValueError) as info:
-        data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples)
-    assert "num_samples cannot be less than 0" in str(info.value)
+        ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples)
+    assert 'Input num_samples is not within the required interval of (0 to 2147483647).' in str(info.value)
 
     num_samples = 1
     data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples)
diff --git a/tests/ut/python/dataset/test_fill_op.py b/tests/ut/python/dataset/test_fill_op.py
index f138dd15ec9..657a5297235 100644
--- a/tests/ut/python/dataset/test_fill_op.py
+++ b/tests/ut/python/dataset/test_fill_op.py
@@ -82,9 +82,9 @@ def test_fillop_error_handling():
     data = data.map(input_columns=["col"], operations=fill_op)
 
     with pytest.raises(RuntimeError) as error_info:
-        for data_row in data:
-            print(data_row)
-    assert "Types do not match" in repr(error_info.value)
+        for _ in data:
+            pass
+    assert "Types do not match" in str(error_info.value)
 
 
 if __name__ == "__main__":
diff --git a/tests/ut/python/dataset/test_from_dataset.py b/tests/ut/python/dataset/test_from_dataset.py
index 207a6be6a1a..983052ea08f 100644
--- a/tests/ut/python/dataset/test_from_dataset.py
+++ b/tests/ut/python/dataset/test_from_dataset.py
@@ -23,9 +23,10 @@ import mindspore.dataset.text as text
 def test_demo_basic_from_dataset():
     """ this is a tutorial on how from_dataset should be used in a normal use case"""
     data = ds.TextFileDataset("../data/dataset/testVocab/words.txt", shuffle=False)
-    vocab = text.Vocab.from_dataset(data, "text", freq_range=None, top_k=None, special_tokens=["<pad>", "<unk>"],
+    vocab = text.Vocab.from_dataset(data, "text", freq_range=None, top_k=None,
+                                    special_tokens=["<pad>", "<unk>"],
                                     special_first=True)
-    data = data.map(input_columns=["text"], operations=text.Lookup(vocab))
+    data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "<unk>"))
     res = []
     for d in data.create_dict_iterator():
         res.append(d["text"].item())
@@ -38,7 +39,7 @@ def test_demo_basic_from_dataset_with_tokenizer():
     data = data.map(input_columns=["text"], operations=text.UnicodeCharTokenizer())
     vocab = text.Vocab.from_dataset(data, None, freq_range=None, top_k=None, special_tokens=["<pad>", "<unk>"],
                                     special_first=True)
-    data = data.map(input_columns=["text"], operations=text.Lookup(vocab))
+    data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "<unk>"))
     res = []
     for d in data.create_dict_iterator():
         res.append(list(d["text"]))
@@ -59,7 +60,7 @@ def test_from_dataset():
         corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"])
         vocab = text.Vocab.from_dataset(corpus_dataset, None, freq_range, top_k, special_tokens=["<pad>", "<unk>"],
                                         special_first=True)
-        corpus_dataset = corpus_dataset.map(input_columns="text", operations=text.Lookup(vocab))
+        corpus_dataset = corpus_dataset.map(input_columns="text", operations=text.Lookup(vocab, "<unk>"))
         res = []
         for d in corpus_dataset.create_dict_iterator():
             res.append(list(d["text"]))
@@ -107,7 +108,7 @@ def test_from_dataset_special_token():
         corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"])
         vocab = text.Vocab.from_dataset(corpus_dataset, None, None, top_k, special_tokens, special_first)
         data = ds.GeneratorDataset(gen_input(texts), column_names=["text"])
-        data = data.map(input_columns="text", operations=text.Lookup(vocab))
+        data = data.map(input_columns="text", operations=text.Lookup(vocab, "<unk>"))
         res = []
         for d in data.create_dict_iterator():
             res.append(d["text"].item())
@@ -127,15 +128,16 @@ def test_from_dataset_exceptions():
             data = ds.TextFileDataset("../data/dataset/testVocab/words.txt", shuffle=False)
             vocab = text.Vocab.from_dataset(data, columns, freq_range, top_k)
             assert isinstance(vocab.text.Vocab)
-        except ValueError as e:
+        except (TypeError, ValueError) as e:
             assert s in str(e), str(e)
 
-    test_config("text", (), 1, "freq_range needs to be either None or a tuple of 2 integers")
-    test_config("text", (2, 3), 1.2345, "top_k needs to be a positive integer")
-    test_config(23, (2, 3), 1.2345, "columns need to be a list of strings")
-    test_config("text", (100, 1), 12, "frequency range [a,b] should be 0 <= a <= b")
-    test_config("text", (2, 3), 0, "top_k needs to be a positive integer")
-    test_config([123], (2, 3), 0, "columns need to be a list of strings")
+    test_config("text", (), 1, "freq_range needs to be a tuple of 2 integers or an int and a None.")
+    test_config("text", (2, 3), 1.2345,
+                "Argument top_k with value 1.2345 is not of type (<class 'int'>, <class 'NoneType'>)")
+    test_config(23, (2, 3), 1.2345, "Argument col_0 with value 23 is not of type (<class 'str'>,)")
+    test_config("text", (100, 1), 12, "frequency range [a,b] should be 0 <= a <= b (a,b are inclusive)")
+    test_config("text", (2, 3), 0, "top_k must be greater than 0")
+    test_config([123], (2, 3), -1, "top_k must be greater than 0")
 
 
 if __name__ == '__main__':
diff --git a/tests/ut/python/dataset/test_graphdata.py b/tests/ut/python/dataset/test_graphdata.py
index 40833366233..0f78cfd03a8 100644
--- a/tests/ut/python/dataset/test_graphdata.py
+++ b/tests/ut/python/dataset/test_graphdata.py
@@ -23,6 +23,10 @@ SOCIAL_DATA_FILE = "../data/mindrecord/testGraphData/sns"
 
 
 def test_graphdata_getfullneighbor():
+    """
+    Test get all neighbors
+    """
+    logger.info('test get all neighbors.\n')
     g = ds.GraphData(DATASET_FILE, 2)
     nodes = g.get_all_nodes(1)
     assert len(nodes) == 10
@@ -33,6 +37,10 @@ def test_graphdata_getfullneighbor():
 
 
 def test_graphdata_getnodefeature_input_check():
+    """
+    Test get node feature input check
+    """
+    logger.info('test getnodefeature input check.\n')
     g = ds.GraphData(DATASET_FILE)
     with pytest.raises(TypeError):
         input_list = [1, [1, 1]]
@@ -80,6 +88,10 @@ def test_graphdata_getnodefeature_input_check():
 
 
 def test_graphdata_getsampledneighbors():
+    """
+    Test sampled neighbors
+    """
+    logger.info('test get sampled neighbors.\n')
     g = ds.GraphData(DATASET_FILE, 1)
     edges = g.get_all_edges(0)
     nodes = g.get_nodes_from_edges(edges)
@@ -90,6 +102,10 @@ def test_graphdata_getsampledneighbors():
 
 
 def test_graphdata_getnegsampledneighbors():
+    """
+    Test neg sampled neighbors
+    """
+    logger.info('test get negative sampled neighbors.\n')
     g = ds.GraphData(DATASET_FILE, 2)
     nodes = g.get_all_nodes(1)
     assert len(nodes) == 10
@@ -98,6 +114,10 @@ def test_graphdata_getnegsampledneighbors():
 
 
 def test_graphdata_graphinfo():
+    """
+    Test graph info
+    """
+    logger.info('test graph info.\n')
     g = ds.GraphData(DATASET_FILE, 2)
     graph_info = g.graph_info()
     assert graph_info['node_type'] == [1, 2]
@@ -105,7 +125,7 @@ def test_graphdata_graphinfo():
     assert graph_info['node_num'] == {1: 10, 2: 10}
     assert graph_info['edge_num'] == {0: 40}
     assert graph_info['node_feature_type'] == [1, 2, 3, 4]
-    assert graph_info['edge_feature_type'] == []
+    assert graph_info['edge_feature_type'] == [1, 2]
 
 
 class RandomBatchedSampler(ds.Sampler):
@@ -155,6 +175,10 @@ class GNNGraphDataset():
 
 
 def test_graphdata_generatordataset():
+    """
+    Test generator dataset
+    """
+    logger.info('test generator dataset.\n')
     g = ds.GraphData(DATASET_FILE)
     batch_num = 2
     edge_num = g.graph_info()['edge_num'][0]
@@ -173,10 +197,13 @@ def test_graphdata_generatordataset():
     assert i == 40
 
 
-def test_graphdata_randomwalk():
+def test_graphdata_randomwalkdefault():
+    """
+    Test random walk defaults
+    """
+    logger.info('test randomwalk with default parameters.\n')
     g = ds.GraphData(SOCIAL_DATA_FILE, 1)
     nodes = g.get_all_nodes(1)
-    print(len(nodes))
     assert len(nodes) == 33
 
     meta_path = [1 for _ in range(39)]
@@ -184,18 +211,39 @@ def test_graphdata_randomwalk():
     assert walks.shape == (33, 40)
 
 
+def test_graphdata_randomwalk():
+    """
+    Test random walk
+    """
+    logger.info('test random walk with given parameters.\n')
+    g = ds.GraphData(SOCIAL_DATA_FILE, 1)
+    nodes = g.get_all_nodes(1)
+    assert len(nodes) == 33
+
+    meta_path = [1 for _ in range(39)]
+    walks = g.random_walk(nodes, meta_path, 2.0, 0.5, -1)
+    assert walks.shape == (33, 40)
+
+
+def test_graphdata_getedgefeature():
+    """
+    Test get edge feature
+    """
+    logger.info('test get_edge_feature.\n')
+    g = ds.GraphData(DATASET_FILE)
+    edges = g.get_all_edges(0)
+    features = g.get_edge_feature(edges, [1, 2])
+    assert features[0].shape == (40,)
+    assert features[1].shape == (40,)
+
+
 if __name__ == '__main__':
     test_graphdata_getfullneighbor()
-    logger.info('test_graphdata_getfullneighbor Ended.\n')
     test_graphdata_getnodefeature_input_check()
-    logger.info('test_graphdata_getnodefeature_input_check Ended.\n')
     test_graphdata_getsampledneighbors()
-    logger.info('test_graphdata_getsampledneighbors Ended.\n')
     test_graphdata_getnegsampledneighbors()
-    logger.info('test_graphdata_getnegsampledneighbors Ended.\n')
     test_graphdata_graphinfo()
-    logger.info('test_graphdata_graphinfo Ended.\n')
     test_graphdata_generatordataset()
-    logger.info('test_graphdata_generatordataset Ended.\n')
+    test_graphdata_randomwalkdefault()
     test_graphdata_randomwalk()
-    logger.info('test_graphdata_randomwalk Ended.\n')
+    test_graphdata_getedgefeature()
diff --git a/tests/ut/python/dataset/test_linear_transformation.py b/tests/ut/python/dataset/test_linear_transformation.py
index 0dd25a4da1e..f932916ed83 100644
--- a/tests/ut/python/dataset/test_linear_transformation.py
+++ b/tests/ut/python/dataset/test_linear_transformation.py
@@ -73,6 +73,7 @@ def test_linear_transformation_op(plot=False):
     if plot:
         visualize_list(image, image_transformed)
 
+
 def test_linear_transformation_md5():
     """
     Test LinearTransformation op: valid params (transformation_matrix, mean_vector)
@@ -102,6 +103,7 @@ def test_linear_transformation_md5():
     filename = "linear_transformation_01_result.npz"
     save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
 
+
 def test_linear_transformation_exception_01():
     """
     Test LinearTransformation op: transformation_matrix is not provided
@@ -126,9 +128,10 @@ def test_linear_transformation_exception_01():
         ]
         transform = py_vision.ComposeOp(transforms)
         data1 = data1.map(input_columns=["image"], operations=transform())
-    except ValueError as e:
+    except TypeError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "not provided" in str(e)
+        assert "Argument transformation_matrix with value None is not of type (<class 'numpy.ndarray'>,)" in str(e)
+
 
 def test_linear_transformation_exception_02():
     """
@@ -154,9 +157,10 @@ def test_linear_transformation_exception_02():
         ]
         transform = py_vision.ComposeOp(transforms)
         data1 = data1.map(input_columns=["image"], operations=transform())
-    except ValueError as e:
+    except TypeError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "not provided" in str(e)
+        assert "Argument mean_vector with value None is not of type (<class 'numpy.ndarray'>,)" in str(e)
+
 
 def test_linear_transformation_exception_03():
     """
@@ -187,6 +191,7 @@ def test_linear_transformation_exception_03():
         logger.info("Got an exception in DE: {}".format(str(e)))
         assert "square matrix" in str(e)
 
+
 def test_linear_transformation_exception_04():
     """
     Test LinearTransformation op: mean_vector does not match dimension of transformation_matrix
@@ -199,7 +204,7 @@ def test_linear_transformation_exception_04():
     weight = 50
     dim = 3 * height * weight
     transformation_matrix = np.ones([dim, dim])
-    mean_vector = np.zeros(dim-1)
+    mean_vector = np.zeros(dim - 1)
 
     # Generate dataset
     data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
@@ -216,6 +221,7 @@ def test_linear_transformation_exception_04():
         logger.info("Got an exception in DE: {}".format(str(e)))
         assert "should match" in str(e)
 
+
 if __name__ == '__main__':
     test_linear_transformation_op(plot=True)
     test_linear_transformation_md5()
diff --git a/tests/ut/python/dataset/test_minddataset_exception.py b/tests/ut/python/dataset/test_minddataset_exception.py
index b15944d76b8..0b4d0dfc8fe 100644
--- a/tests/ut/python/dataset/test_minddataset_exception.py
+++ b/tests/ut/python/dataset/test_minddataset_exception.py
@@ -184,24 +184,26 @@ def test_minddataset_invalidate_num_shards():
     create_cv_mindrecord(1)
     columns_list = ["data", "label"]
     num_readers = 4
-    with pytest.raises(Exception, match="shard_id is invalid, "):
+    with pytest.raises(Exception) as error_info:
         data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 1, 2)
         num_iter = 0
         for _ in data_set.create_dict_iterator():
             num_iter += 1
+    assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info)
+
     os.remove(CV_FILE_NAME)
     os.remove("{}.db".format(CV_FILE_NAME))
 
-
 def test_minddataset_invalidate_shard_id():
     create_cv_mindrecord(1)
     columns_list = ["data", "label"]
     num_readers = 4
-    with pytest.raises(Exception, match="shard_id is invalid, "):
+    with pytest.raises(Exception) as error_info:
         data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 1, -1)
         num_iter = 0
         for _ in data_set.create_dict_iterator():
             num_iter += 1
+    assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info)
     os.remove(CV_FILE_NAME)
     os.remove("{}.db".format(CV_FILE_NAME))
 
@@ -210,17 +212,19 @@ def test_minddataset_shard_id_bigger_than_num_shard():
     create_cv_mindrecord(1)
     columns_list = ["data", "label"]
     num_readers = 4
-    with pytest.raises(Exception, match="shard_id is invalid, "):
+    with pytest.raises(Exception) as error_info:
         data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 2, 2)
         num_iter = 0
         for _ in data_set.create_dict_iterator():
             num_iter += 1
+    assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info)
 
-    with pytest.raises(Exception, match="shard_id is invalid, "):
+    with pytest.raises(Exception) as error_info:
         data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 2, 5)
         num_iter = 0
         for _ in data_set.create_dict_iterator():
             num_iter += 1
+    assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info)
 
     os.remove(CV_FILE_NAME)
     os.remove("{}.db".format(CV_FILE_NAME))
diff --git a/tests/ut/python/dataset/test_ngram_op.py b/tests/ut/python/dataset/test_ngram_op.py
index 73b2702378e..777fca8764a 100644
--- a/tests/ut/python/dataset/test_ngram_op.py
+++ b/tests/ut/python/dataset/test_ngram_op.py
@@ -15,9 +15,9 @@
 """
 Testing Ngram in mindspore.dataset
 """
+import numpy as np
 import mindspore.dataset as ds
 import mindspore.dataset.text as text
-import numpy as np
 
 
 def test_multiple_ngrams():
@@ -61,7 +61,7 @@ def test_simple_ngram():
             yield (np.array(line.split(" "), dtype='S'),)
 
     dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"])
-    dataset = dataset.map(input_columns=["text"], operations=text.Ngram(3, separator=None))
+    dataset = dataset.map(input_columns=["text"], operations=text.Ngram(3, separator=" "))
 
     i = 0
     for data in dataset.create_dict_iterator():
@@ -72,43 +72,36 @@ def test_simple_ngram():
 def test_corner_cases():
     """ testing various corner cases and exceptions"""
 
-    def test_config(input_line, output_line, n, l_pad=None, r_pad=None, sep=None):
+    def test_config(input_line, n, l_pad=("", 0), r_pad=("", 0), sep=" "):
         def gen(texts):
             yield (np.array(texts.split(" "), dtype='S'),)
 
-        dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"])
-        dataset = dataset.map(input_columns=["text"], operations=text.Ngram(n, l_pad, r_pad, separator=sep))
-        for data in dataset.create_dict_iterator():
-            assert [d.decode("utf8") for d in data["text"]] == output_line, output_line
+        try:
+            dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"])
+            dataset = dataset.map(input_columns=["text"], operations=text.Ngram(n, l_pad, r_pad, separator=sep))
+            for data in dataset.create_dict_iterator():
+                return [d.decode("utf8") for d in data["text"]]
+        except (ValueError, TypeError) as e:
+            return str(e)
 
     # test tensor length smaller than n
-    test_config("Lone Star", ["Lone Star", "", "", ""], [2, 3, 4, 5])
+    assert test_config("Lone Star", [2, 3, 4, 5]) == ["Lone Star", "", "", ""]
     # test empty separator
-    test_config("Beautiful British Columbia", ['BeautifulBritish', 'BritishColumbia'], 2, sep="")
+    assert test_config("Beautiful British Columbia", 2, sep="") == ['BeautifulBritish', 'BritishColumbia']
     # test separator with longer length
-    test_config("Beautiful British Columbia", ['Beautiful^-^British^-^Columbia'], 3, sep="^-^")
+    assert test_config("Beautiful British Columbia", 3, sep="^-^") == ['Beautiful^-^British^-^Columbia']
     # test left pad != right pad
-    test_config("Lone Star", ['The Lone Star State'], 4, ("The", 1), ("State", 1))
+    assert test_config("Lone Star", 4, ("The", 1), ("State", 1)) == ['The Lone Star State']
     # test invalid n
-    try:
-        test_config("Yours to Discover", "", [0, [1]])
-    except Exception as e:
-        assert "ngram needs to be a positive number" in str(e)
-    # test empty n
-    try:
-        test_config("Yours to Discover", "", [])
-    except Exception as e:
-        assert "n needs to be a non-empty list" in str(e)
+    assert "gram[1] with value [1] is not of type (<class 'int'>,)" in test_config("Yours to Discover", [1, [1]])
+    assert "n needs to be a non-empty list" in test_config("Yours to Discover", [])
     # test invalid pad
-    try:
-        test_config("Yours to Discover", "", [1], ("str", -1))
-    except Exception as e:
-        assert "padding width need to be positive numbers" in str(e)
-    # test invalid pad
-    try:
-        test_config("Yours to Discover", "", [1], ("str", "rts"))
-    except Exception as e:
-        assert "pad needs to be a tuple of (str, int)" in str(e)
+    assert "padding width need to be positive numbers" in test_config("Yours to Discover", [1], ("str", -1))
+    assert "pad needs to be a tuple of (str, int)" in test_config("Yours to Discover", [1], ("str", "rts"))
+    # test 0 as in valid input
+    assert "gram_0 must be greater than 0" in test_config("Yours to Discover", 0)
+    assert "gram_0 must be greater than 0" in test_config("Yours to Discover", [0])
+    assert "gram_1 must be greater than 0" in test_config("Yours to Discover", [1, 0])
 
 
 if __name__ == '__main__':
diff --git a/tests/ut/python/dataset/test_nlp.py b/tests/ut/python/dataset/test_nlp.py
index 6b44cfc80bb..cb517160a19 100644
--- a/tests/ut/python/dataset/test_nlp.py
+++ b/tests/ut/python/dataset/test_nlp.py
@@ -34,13 +34,32 @@ def test_on_tokenized_line():
             jieba_op.add_word(word)
     data = data.map(input_columns=["text"], operations=jieba_op)
     vocab = text.Vocab.from_file(VOCAB_FILE, ",", special_tokens=["<pad>", "<unk>"])
-    lookup = text.Lookup(vocab)
+    lookup = text.Lookup(vocab, "<unk>")
     data = data.map(input_columns=["text"], operations=lookup)
     res = np.array([[10, 1, 11, 1, 12, 1, 15, 1, 13, 1, 14],
                     [11, 1, 12, 1, 10, 1, 14, 1, 13, 1, 15]], dtype=np.int32)
     for i, d in enumerate(data.create_dict_iterator()):
-        _ = (np.testing.assert_array_equal(d["text"], res[i]), i)
+        np.testing.assert_array_equal(d["text"], res[i])
+
+
+def test_on_tokenized_line_with_no_special_tokens():
+    data = ds.TextFileDataset("../data/dataset/testVocab/lines.txt", shuffle=False)
+    jieba_op = text.JiebaTokenizer(HMM_FILE, MP_FILE, mode=text.JiebaMode.MP)
+    with open(VOCAB_FILE, 'r') as f:
+        for line in f:
+            word = line.split(',')[0]
+            jieba_op.add_word(word)
+
+    data = data.map(input_columns=["text"], operations=jieba_op)
+    vocab = text.Vocab.from_file(VOCAB_FILE, ",")
+    lookup = text.Lookup(vocab, "not")
+    data = data.map(input_columns=["text"], operations=lookup)
+    res = np.array([[8, 0, 9, 0, 10, 0, 13, 0, 11, 0, 12],
+                    [9, 0, 10, 0, 8, 0, 12, 0, 11, 0, 13]], dtype=np.int32)
+    for i, d in enumerate(data.create_dict_iterator()):
+        np.testing.assert_array_equal(d["text"], res[i])
 
 
 if __name__ == '__main__':
     test_on_tokenized_line()
+    test_on_tokenized_line_with_no_special_tokens()
diff --git a/tests/ut/python/dataset/test_nlp_jieop.py b/tests/ut/python/dataset/test_nlp_jieop.py
deleted file mode 100644
index 1ab53205d08..00000000000
--- a/tests/ut/python/dataset/test_nlp_jieop.py
+++ /dev/null
@@ -1,238 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-import numpy as np
-import mindspore.dataset as ds
-from mindspore.dataset.text import JiebaTokenizer
-from mindspore.dataset.text import JiebaMode, to_str
-
-DATA_FILE = "../data/dataset/testJiebaDataset/3.txt"
-DATA_ALL_FILE = "../data/dataset/testJiebaDataset/*"
-
-HMM_FILE = "../data/dataset/jiebadict/hmm_model.utf8"
-MP_FILE = "../data/dataset/jiebadict/jieba.dict.utf8"
-
-
-def test_jieba_1():
-    """Test jieba tokenizer with MP mode"""
-    data = ds.TextFileDataset(DATA_FILE)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=1)
-    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
-    ret = []
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_1_1():
-    """Test jieba tokenizer with HMM mode"""
-    data = ds.TextFileDataset(DATA_FILE)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.HMM)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=1)
-    expect = ['今天', '天气', '太', '好', '了', '我们', '一起', '去', '外面', '玩', '吧']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_1_2():
-    """Test jieba tokenizer with HMM MIX"""
-    data = ds.TextFileDataset(DATA_FILE)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MIX)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=1)
-    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_2():
-    """Test add_word"""
-    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
-    data = ds.TextFileDataset(DATA_FILE4)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    jieba_op.add_word("男默女泪")
-    expect = ['男默女泪', '市', '长江大桥']
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=2)
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_2_1():
-    """Test add_word with freq"""
-    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
-    data = ds.TextFileDataset(DATA_FILE4)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    jieba_op.add_word("男默女泪", 10)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=2)
-    expect = ['男默女泪', '市', '长江大桥']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_2_2():
-    """Test add_word with invalid None Input"""
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    try:
-        jieba_op.add_word(None)
-    except ValueError:
-        pass
-
-
-def test_jieba_2_3():
-    """Test add_word with freq, the value of freq affects the result of segmentation"""
-    DATA_FILE4 = "../data/dataset/testJiebaDataset/6.txt"
-    data = ds.TextFileDataset(DATA_FILE4)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    jieba_op.add_word("江大桥", 20000)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=2)
-    expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_3():
-    """Test add_dict with dict"""
-    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
-    user_dict = {
-        "男默女泪": 10
-    }
-    data = ds.TextFileDataset(DATA_FILE4)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    jieba_op.add_dict(user_dict)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=1)
-    expect = ['男默女泪', '市', '长江大桥']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_3_1():
-    """Test add_dict with dict"""
-    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
-    user_dict = {
-        "男默女泪": 10,
-        "江大桥": 20000
-    }
-    data = ds.TextFileDataset(DATA_FILE4)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    jieba_op.add_dict(user_dict)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=1)
-    expect = ['男默女泪', '市长', '江大桥']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_4():
-    DATA_FILE4 = "../data/dataset/testJiebaDataset/3.txt"
-    DICT_FILE = "../data/dataset/testJiebaDataset/user_dict.txt"
-
-    data = ds.TextFileDataset(DATA_FILE4)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    jieba_op.add_dict(DICT_FILE)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=1)
-    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def test_jieba_4_1():
-    """Test add dict with invalid file path"""
-    DICT_FILE = ""
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    try:
-        jieba_op.add_dict(DICT_FILE)
-    except ValueError:
-        pass
-
-
-def test_jieba_5():
-    """Test add dict with file path"""
-    DATA_FILE4 = "../data/dataset/testJiebaDataset/6.txt"
-
-    data = ds.TextFileDataset(DATA_FILE4)
-    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
-    jieba_op.add_word("江大桥", 20000)
-    data = data.map(input_columns=["text"],
-                    operations=jieba_op, num_parallel_workers=1)
-    expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-def gen():
-    text = np.array("今天天气太好了我们一起去外面玩吧".encode("UTF8"), dtype='S')
-    yield (text,)
-
-
-def pytoken_op(input_data):
-    te = str(to_str(input_data))
-    tokens = []
-    tokens.append(te[:5].encode("UTF8"))
-    tokens.append(te[5:10].encode("UTF8"))
-    tokens.append(te[10:].encode("UTF8"))
-    return np.array(tokens, dtype='S')
-
-
-def test_jieba_6():
-    data = ds.GeneratorDataset(gen, column_names=["text"])
-    data = data.map(input_columns=["text"],
-                    operations=pytoken_op, num_parallel_workers=1)
-    expect = ['今天天气太', '好了我们一', '起去外面玩吧']
-    for i in data.create_dict_iterator():
-        ret = to_str(i["text"])
-        for index, item in enumerate(ret):
-            assert item == expect[index]
-
-
-if __name__ == "__main__":
-    test_jieba_1()
-    test_jieba_1_1()
-    test_jieba_1_2()
-    test_jieba_2()
-    test_jieba_2_1()
-    test_jieba_2_2()
-    test_jieba_3()
-    test_jieba_3_1()
-    test_jieba_4()
-    test_jieba_4_1()
-    test_jieba_5()
-    test_jieba_5()
-    test_jieba_6()
diff --git a/tests/ut/python/dataset/test_noop_mode.py b/tests/ut/python/dataset/test_noop_mode.py
new file mode 100644
index 00000000000..0ea96732001
--- /dev/null
+++ b/tests/ut/python/dataset/test_noop_mode.py
@@ -0,0 +1,45 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Test No-op mode support with Dummy Iterator
+"""
+import os
+import mindspore.dataset as ds
+
+DATA_DIR = "../data/dataset/testVOC2012"
+
+def test_noop_pserver():
+    os.environ['MS_ROLE'] = 'MS_PSERVER'
+    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", mode="train", decode=True, shuffle=False)
+    num = 0
+    for _ in data1.create_dict_iterator():
+        num += 1
+    assert num == 0
+    del os.environ['MS_ROLE']
+
+
+def test_noop_sched():
+    os.environ['MS_ROLE'] = 'MS_SCHED'
+    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", mode="train", decode=True, shuffle=False)
+    num = 0
+    for _ in data1.create_dict_iterator():
+        num += 1
+    assert num == 0
+    del os.environ['MS_ROLE']
+
+
+if __name__ == '__main__':
+    test_noop_pserver()
+    test_noop_sched()
diff --git a/tests/ut/python/dataset/test_normalizeOp.py b/tests/ut/python/dataset/test_normalizeOp.py
index af97ee0c088..d5ebc799f91 100644
--- a/tests/ut/python/dataset/test_normalizeOp.py
+++ b/tests/ut/python/dataset/test_normalizeOp.py
@@ -279,7 +279,7 @@ def test_normalize_exception_invalid_range_py():
         _ = py_vision.Normalize([0.75, 1.25, 0.5], [0.1, 0.18, 1.32])
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input is not within the required range" in str(e)
+        assert "Input mean_value is not within the required interval of (0.0 to 1.0)." in str(e)
 
 
 def test_normalize_grayscale_md5_01():
diff --git a/tests/ut/python/dataset/test_onehot_op.py b/tests/ut/python/dataset/test_onehot_op.py
index 500f770b9bb..44d98b0ae0a 100644
--- a/tests/ut/python/dataset/test_onehot_op.py
+++ b/tests/ut/python/dataset/test_onehot_op.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 # ==============================================================================
 """
-Testing the one_hot op in DE
+Testing the OneHot Op
 """
 import numpy as np
 
 import mindspore.dataset as ds
 import mindspore.dataset.transforms.c_transforms as data_trans
+import mindspore.dataset.transforms.vision.c_transforms as c_vision
 from mindspore import log as logger
 from util import diff_mse
 
@@ -37,15 +38,15 @@ def one_hot(index, depth):
 
 def test_one_hot():
     """
-    Test one_hot
+    Test OneHot Tensor Operator
     """
-    logger.info("Test one_hot")
+    logger.info("test_one_hot")
 
     depth = 10
 
     # First dataset
     data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
-    one_hot_op = data_trans.OneHot(depth)
+    one_hot_op = data_trans.OneHot(num_classes=depth)
     data1 = data1.map(input_columns=["label"], operations=one_hot_op, columns_order=["label"])
 
     # Second dataset
@@ -58,8 +59,54 @@ def test_one_hot():
         label2 = one_hot(item2["label"][0], depth)
         mse = diff_mse(label1, label2)
         logger.info("DE one_hot: {}, Numpy one_hot: {}, diff: {}".format(label1, label2, mse))
+        assert mse == 0
         num_iter += 1
+    assert num_iter == 3
+
+def test_one_hot_post_aug():
+    """
+    Test One Hot Encoding after Multiple Data Augmentation Operators
+    """
+    logger.info("test_one_hot_post_aug")
+    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
+
+    # Define data augmentation parameters
+    rescale = 1.0 / 255.0
+    shift = 0.0
+    resize_height, resize_width = 224, 224
+
+    # Define map operations
+    decode_op = c_vision.Decode()
+    rescale_op = c_vision.Rescale(rescale, shift)
+    resize_op = c_vision.Resize((resize_height, resize_width))
+
+    # Apply map operations on images
+    data1 = data1.map(input_columns=["image"], operations=decode_op)
+    data1 = data1.map(input_columns=["image"], operations=rescale_op)
+    data1 = data1.map(input_columns=["image"], operations=resize_op)
+
+    # Apply one-hot encoding on labels
+    depth = 4
+    one_hot_encode = data_trans.OneHot(depth)
+    data1 = data1.map(input_columns=["label"], operations=one_hot_encode)
+
+    # Apply datasets ops
+    buffer_size = 100
+    seed = 10
+    batch_size = 2
+    ds.config.set_seed(seed)
+    data1 = data1.shuffle(buffer_size=buffer_size)
+    data1 = data1.batch(batch_size, drop_remainder=True)
+
+    num_iter = 0
+    for item in data1.create_dict_iterator():
+        logger.info("image is: {}".format(item["image"]))
+        logger.info("label is: {}".format(item["label"]))
+        num_iter += 1
+
+    assert num_iter == 1
 
 
 if __name__ == "__main__":
     test_one_hot()
+    test_one_hot_post_aug()
diff --git a/tests/ut/python/dataset/test_pad_end_op.py b/tests/ut/python/dataset/test_pad_end_op.py
index 5742d736659..c25d6b9a95b 100644
--- a/tests/ut/python/dataset/test_pad_end_op.py
+++ b/tests/ut/python/dataset/test_pad_end_op.py
@@ -61,6 +61,10 @@ def test_pad_end_exceptions():
         pad_compare([3, 4, 5], ["2"], 1, [])
     assert "a value in the list is not an integer." in str(info.value)
 
+    with pytest.raises(TypeError) as info:
+        pad_compare([1, 2], 3, -1, [1, 2, -1])
+    assert "Argument pad_end with value 3 is not of type (<class 'list'>,)" in str(info.value)
+
 
 if __name__ == "__main__":
     test_pad_end_basics()
diff --git a/tests/ut/python/dataset/test_random_affine.py b/tests/ut/python/dataset/test_random_affine.py
index b856684ed13..ec829eb53a7 100644
--- a/tests/ut/python/dataset/test_random_affine.py
+++ b/tests/ut/python/dataset/test_random_affine.py
@@ -103,7 +103,7 @@ def test_random_affine_exception_negative_degrees():
         _ = py_vision.RandomAffine(degrees=-15)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "If degrees is a single number, it cannot be negative."
+        assert str(e) == "Input degrees is not within the required interval of (0 to inf)."
 
 
 def test_random_affine_exception_translation_range():
@@ -115,7 +115,7 @@ def test_random_affine_exception_translation_range():
         _ = py_vision.RandomAffine(degrees=15, translate=(0.1, 1.5))
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "translation values should be between 0 and 1"
+        assert str(e) == "Input translate at 1 is not within the required interval of (0.0 to 1.0)."
 
 
 def test_random_affine_exception_scale_value():
@@ -127,7 +127,7 @@ def test_random_affine_exception_scale_value():
         _ = py_vision.RandomAffine(degrees=15, scale=(0.0, 1.1))
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "scale values should be positive"
+        assert str(e) == "Input scale[0] must be greater than 0."
 
 
 def test_random_affine_exception_shear_value():
@@ -139,7 +139,7 @@ def test_random_affine_exception_shear_value():
         _ = py_vision.RandomAffine(degrees=15, shear=-5)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "If shear is a single number, it must be positive."
+        assert str(e) == "Input shear must be greater than 0."
 
 
 def test_random_affine_exception_degrees_size():
@@ -165,7 +165,9 @@ def test_random_affine_exception_translate_size():
         _ = py_vision.RandomAffine(degrees=15, translate=(0.1))
     except TypeError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "translate should be a list or tuple of length 2."
+        assert str(
+            e) == "Argument translate with value 0.1 is not of type (<class 'list'>," \
+                  " <class 'tuple'>)."
 
 
 def test_random_affine_exception_scale_size():
@@ -178,7 +180,8 @@ def test_random_affine_exception_scale_size():
         _ = py_vision.RandomAffine(degrees=15, scale=(0.5))
     except TypeError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "scale should be a list or tuple of length 2."
+        assert str(e) == "Argument scale with value 0.5 is not of type (<class 'tuple'>," \
+                         " <class 'list'>)."
 
 
 def test_random_affine_exception_shear_size():
@@ -191,7 +194,7 @@ def test_random_affine_exception_shear_size():
         _ = py_vision.RandomAffine(degrees=15, shear=(-5, 5, 10))
     except TypeError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "shear should be a list or tuple and it must be of length 2 or 4."
+        assert str(e) == "shear must be of length 2 or 4."
 
 
 if __name__ == "__main__":
diff --git a/tests/ut/python/dataset/test_random_color.py b/tests/ut/python/dataset/test_random_color.py
index 45847ba6534..0015e8498f6 100644
--- a/tests/ut/python/dataset/test_random_color.py
+++ b/tests/ut/python/dataset/test_random_color.py
@@ -97,7 +97,7 @@ def test_random_color_md5():
     data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
 
     transforms = F.ComposeOp([F.Decode(),
-                              F.RandomColor((0.5, 1.5)),
+                              F.RandomColor((0.1, 1.9)),
                               F.ToTensor()])
 
     data = data.map(input_columns="image", operations=transforms())
diff --git a/tests/ut/python/dataset/test_random_crop_and_resize.py b/tests/ut/python/dataset/test_random_crop_and_resize.py
index de039e6d82e..486d2cd5ed1 100644
--- a/tests/ut/python/dataset/test_random_crop_and_resize.py
+++ b/tests/ut/python/dataset/test_random_crop_and_resize.py
@@ -232,7 +232,7 @@ def test_random_crop_and_resize_04_c():
         data = data.map(input_columns=["image"], operations=random_crop_and_resize_op)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input range is not valid" in str(e)
+        assert "Input is not within the required interval of (0 to 16777216)." in str(e)
 
 
 def test_random_crop_and_resize_04_py():
@@ -255,7 +255,7 @@ def test_random_crop_and_resize_04_py():
         data = data.map(input_columns=["image"], operations=transform())
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input range is not valid" in str(e)
+        assert "Input is not within the required interval of (0 to 16777216)." in str(e)
 
 
 def test_random_crop_and_resize_05_c():
@@ -275,7 +275,7 @@ def test_random_crop_and_resize_05_c():
         data = data.map(input_columns=["image"], operations=random_crop_and_resize_op)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input range is not valid" in str(e)
+        assert "Input is not within the required interval of (0 to 16777216)." in str(e)
 
 
 def test_random_crop_and_resize_05_py():
@@ -298,7 +298,7 @@ def test_random_crop_and_resize_05_py():
         data = data.map(input_columns=["image"], operations=transform())
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input range is not valid" in str(e)
+        assert "Input is not within the required interval of (0 to 16777216)." in str(e)
 
 
 def test_random_crop_and_resize_comp(plot=False):
diff --git a/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py b/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py
index b13dc466f72..599acc95609 100644
--- a/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py
+++ b/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py
@@ -25,34 +25,16 @@ from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox,
 
 GENERATE_GOLDEN = False
 
-# updated VOC dataset with correct annotations
-DATA_DIR = "../data/dataset/testVOC2012_2"
-
-
-def fix_annotate(bboxes):
-    """
-    Fix annotations to format followed by mindspore.
-    :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format
-    :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format
-    """
-    for bbox in bboxes:
-        if bbox.size == 7:
-            tmp = bbox[0]
-            bbox[0] = bbox[1]
-            bbox[1] = bbox[2]
-            bbox[2] = bbox[3]
-            bbox[3] = bbox[4]
-            bbox[4] = tmp
-        else:
-            print("ERROR: Invalid Bounding Box size provided")
-            break
-    return bboxes
+# Updated VOC dataset with correct annotations - DATA_DIR
+DATA_DIR_VOC = "../data/dataset/testVOC2012_2"
+# COCO dataset - DATA_DIR, ANNOTATION_DIR
+DATA_DIR_COCO = ["../data/dataset/testCOCO/train/", "../data/dataset/testCOCO/annotations/train.json"]
 
 
 def test_random_resized_crop_with_bbox_op_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomResizedCropWithBBox Op applied,
-     tests with MD5 check, expected to pass
+    Prints images and bboxes side by side with and without RandomResizedCropWithBBox Op applied,
+    tests with MD5 check, expected to pass
     """
     logger.info("test_random_resized_crop_with_bbox_op_c")
 
@@ -60,22 +42,16 @@ def test_random_resized_crop_with_bbox_op_c(plot_vis=False):
     original_num_parallel_workers = config_get_set_num_parallel_workers(1)
 
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     test_op = c_vision.RandomResizedCropWithBBox((256, 512), (0.5, 0.5), (0.5, 0.5))
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
-                            operations=[test_op])  # Add column for "annotation"
+                            operations=[test_op])
 
     filename = "random_resized_crop_with_bbox_01_c_result.npz"
     save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
@@ -94,26 +70,49 @@ def test_random_resized_crop_with_bbox_op_c(plot_vis=False):
     ds.config.set_num_parallel_workers(original_num_parallel_workers)
 
 
+def test_random_resized_crop_with_bbox_op_coco_c(plot_vis=False):
+    """
+    Prints images and bboxes side by side with and without RandomResizedCropWithBBox Op applied,
+    Testing with Coco dataset
+    """
+    logger.info("test_random_resized_crop_with_bbox_op_coco_c")
+    # load dataset
+    dataCoco1 = ds.CocoDataset(DATA_DIR_COCO[0], annotation_file=DATA_DIR_COCO[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    dataCoco2 = ds.CocoDataset(DATA_DIR_COCO[0], annotation_file=DATA_DIR_COCO[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    test_op = c_vision.RandomResizedCropWithBBox((512, 512), (0.5, 1), (0.5, 1))
+
+    dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"],
+                              output_columns=["image", "bbox"],
+                              columns_order=["image", "bbox"],
+                              operations=[test_op])
+
+    unaugSamp, augSamp = [], []
+
+    for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()):
+        unaugSamp.append(unAug)
+        augSamp.append(Aug)
+
+    if plot_vis:
+        visualize_with_bounding_boxes(unaugSamp, augSamp, "bbox")
+
+
 def test_random_resized_crop_with_bbox_op_edge_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomResizedCropWithBBox Op applied,
-     tests on dynamically generated edge case, expected to pass
+    Prints images and bboxes side by side with and without RandomResizedCropWithBBox Op applied,
+    tests on dynamically generated edge case, expected to pass
     """
     logger.info("test_random_resized_crop_with_bbox_op_edge_c")
 
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     test_op = c_vision.RandomResizedCropWithBBox((256, 512), (0.5, 0.5), (0.5, 0.5))
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-
     # maps to convert data into valid edge case data
     dataVoc1 = dataVoc1.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -138,20 +137,17 @@ def test_random_resized_crop_with_bbox_op_edge_c(plot_vis=False):
 
 def test_random_resized_crop_with_bbox_op_invalid_c():
     """
-     Tests RandomResizedCropWithBBox on invalid constructor parameters, expected to raise ValueError
+    Tests RandomResizedCropWithBBox on invalid constructor parameters, expected to raise ValueError
     """
     logger.info("test_random_resized_crop_with_bbox_op_invalid_c")
 
     # Load dataset, only Augmented Dataset as test will raise ValueError
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     try:
         # If input range of scale is not in the order of (min, max), ValueError will be raised.
         test_op = c_vision.RandomResizedCropWithBBox((256, 512), (1, 0.5), (0.5, 0.5))
 
-        dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                                output_columns=["annotation"],
-                                operations=fix_annotate)
         # map to apply ops
         dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                                 output_columns=["image", "annotation"],
@@ -163,7 +159,7 @@ def test_random_resized_crop_with_bbox_op_invalid_c():
 
     except ValueError as err:
         logger.info("Got an exception in DE: {}".format(str(err)))
-        assert "Input range is not valid" in str(err)
+        assert "Input is not within the required interval of (0 to 16777216)." in str(err)
 
 
 def test_random_resized_crop_with_bbox_op_invalid2_c():
@@ -172,15 +168,12 @@ def test_random_resized_crop_with_bbox_op_invalid2_c():
     """
     logger.info("test_random_resized_crop_with_bbox_op_invalid2_c")
     # Load dataset # only loading the to AugDataset as test will fail on this
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     try:
         # If input range of ratio is not in the order of (min, max), ValueError will be raised.
         test_op = c_vision.RandomResizedCropWithBBox((256, 512), (1, 1), (1, 0.5))
 
-        dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                                output_columns=["annotation"],
-                                operations=fix_annotate)
         # map to apply ops
         dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                                 output_columns=["image", "annotation"],
@@ -192,7 +185,7 @@ def test_random_resized_crop_with_bbox_op_invalid2_c():
 
     except ValueError as err:
         logger.info("Got an exception in DE: {}".format(str(err)))
-        assert "Input range is not valid" in str(err)
+        assert "Input is not within the required interval of (0 to 16777216)." in str(err)
 
 
 def test_random_resized_crop_with_bbox_op_bad_c():
@@ -202,18 +195,19 @@ def test_random_resized_crop_with_bbox_op_bad_c():
     logger.info("test_random_resized_crop_with_bbox_op_bad_c")
     test_op = c_vision.RandomResizedCropWithBBox((256, 512), (0.5, 0.5), (0.5, 0.5))
 
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.WidthOverflow, "bounding boxes is out of bounds of the image")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.HeightOverflow, "bounding boxes is out of bounds of the image")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.NegativeXY, "min_x")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.WrongShape, "4 features")
 
 
 if __name__ == "__main__":
     test_random_resized_crop_with_bbox_op_c(plot_vis=True)
+    test_random_resized_crop_with_bbox_op_coco_c(plot_vis=True)
     test_random_resized_crop_with_bbox_op_edge_c(plot_vis=True)
     test_random_resized_crop_with_bbox_op_invalid_c()
     test_random_resized_crop_with_bbox_op_invalid2_c()
diff --git a/tests/ut/python/dataset/test_random_crop_with_bbox.py b/tests/ut/python/dataset/test_random_crop_with_bbox.py
index 9262dfd65de..b93c638f41d 100644
--- a/tests/ut/python/dataset/test_random_crop_with_bbox.py
+++ b/tests/ut/python/dataset/test_random_crop_with_bbox.py
@@ -26,49 +26,25 @@ from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox,
 
 GENERATE_GOLDEN = False
 
-# updated VOC dataset with correct annotations
-DATA_DIR = "../data/dataset/testVOC2012_2"
-
-
-def fix_annotate(bboxes):
-    """
-    Fix annotations to format followed by mindspore.
-    :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format
-    :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format
-    """
-    for bbox in bboxes:
-        if bbox.size == 7:
-            tmp = bbox[0]
-            bbox[0] = bbox[1]
-            bbox[1] = bbox[2]
-            bbox[2] = bbox[3]
-            bbox[3] = bbox[4]
-            bbox[4] = tmp
-        else:
-            print("ERROR: Invalid Bounding Box size provided")
-            break
-    return bboxes
+# Updated VOC dataset with correct annotations - DATA_DIR
+DATA_DIR_VOC = "../data/dataset/testVOC2012_2"
+# COCO dataset - DATA_DIR, ANNOTATION_DIR
+DATA_DIR_COCO = ["../data/dataset/testCOCO/train/", "../data/dataset/testCOCO/annotations/train.json"]
 
 
 def test_random_crop_with_bbox_op_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomCropWithBBox Op applied
+    Prints images and bboxes side by side with and without RandomCropWithBBox Op applied
     """
     logger.info("test_random_crop_with_bbox_op_c")
 
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     # define test OP with values to match existing Op UT
     test_op = c_vision.RandomCropWithBBox([512, 512], [200, 200, 200, 200])
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -85,33 +61,57 @@ def test_random_crop_with_bbox_op_c(plot_vis=False):
         visualize_with_bounding_boxes(unaugSamp, augSamp)
 
 
+def test_random_crop_with_bbox_op_coco_c(plot_vis=False):
+    """
+    Prints images and bboxes side by side with and without RandomCropWithBBox Op applied,
+    Testing with Coco dataset
+    """
+    logger.info("test_random_crop_with_bbox_op_coco_c")
+    # load dataset
+    dataCoco1 = ds.CocoDataset(DATA_DIR_COCO[0], annotation_file=DATA_DIR_COCO[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    dataCoco2 = ds.CocoDataset(DATA_DIR_COCO[0], annotation_file=DATA_DIR_COCO[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    test_op = c_vision.RandomCropWithBBox([512, 512], [200, 200, 200, 200])
+
+    dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"],
+                              output_columns=["image", "bbox"],
+                              columns_order=["image", "bbox"],
+                              operations=[test_op])
+
+    unaugSamp, augSamp = [], []
+
+    for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()):
+        unaugSamp.append(unAug)
+        augSamp.append(Aug)
+
+    if plot_vis:
+        visualize_with_bounding_boxes(unaugSamp, augSamp, "bbox")
+
+
 def test_random_crop_with_bbox_op2_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomCropWithBBox Op applied,
-     with md5 check, expected to pass
+    Prints images and bboxes side by side with and without RandomCropWithBBox Op applied,
+    with md5 check, expected to pass
     """
     logger.info("test_random_crop_with_bbox_op2_c")
     original_seed = config_get_set_seed(593447)
     original_num_parallel_workers = config_get_set_num_parallel_workers(1)
 
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     # define test OP with values to match existing Op unit - test
     test_op = c_vision.RandomCropWithBBox(512, [200, 200, 200, 200], fill_value=(255, 255, 255))
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
-                            operations=[test_op])  # Add column for "annotation"
+                            operations=[test_op])
 
     filename = "random_crop_with_bbox_01_c_result.npz"
     save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
@@ -132,29 +132,23 @@ def test_random_crop_with_bbox_op2_c(plot_vis=False):
 
 def test_random_crop_with_bbox_op3_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomCropWithBBox Op applied,
-     with Padding Mode explicitly passed
+    Prints images and bboxes side by side with and without RandomCropWithBBox Op applied,
+    with Padding Mode explicitly passed
     """
     logger.info("test_random_crop_with_bbox_op3_c")
 
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     # define test OP with values to match existing Op unit - test
     test_op = c_vision.RandomCropWithBBox(512, [200, 200, 200, 200], padding_mode=mode.Border.EDGE)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
-                            operations=[test_op])  # Add column for "annotation"
+                            operations=[test_op])
 
     unaugSamp, augSamp = [], []
 
@@ -168,25 +162,18 @@ def test_random_crop_with_bbox_op3_c(plot_vis=False):
 
 def test_random_crop_with_bbox_op_edge_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomCropWithBBox Op applied,
-     applied on dynamically generated edge case, expected to pass
+    Prints images and bboxes side by side with and without RandomCropWithBBox Op applied,
+    applied on dynamically generated edge case, expected to pass
     """
     logger.info("test_random_crop_with_bbox_op_edge_c")
 
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     # define test OP with values to match existing Op unit - test
     test_op = c_vision.RandomCropWithBBox(512, [200, 200, 200, 200], padding_mode=mode.Border.EDGE)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-
     # maps to convert data into valid edge case data
     dataVoc1 = dataVoc1.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -216,16 +203,12 @@ def test_random_crop_with_bbox_op_invalid_c():
     logger.info("test_random_crop_with_bbox_op_invalid_c")
 
     # Load dataset
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
 
     try:
         # define test OP with values to match existing Op unit - test
         test_op = c_vision.RandomCropWithBBox([512, 512, 375])
 
-        dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                                output_columns=["annotation"],
-                                operations=fix_annotate)
-
         # map to apply ops
         dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                                 output_columns=["image", "annotation"],
@@ -246,18 +229,19 @@ def test_random_crop_with_bbox_op_bad_c():
     logger.info("test_random_crop_with_bbox_op_bad_c")
     test_op = c_vision.RandomCropWithBBox([512, 512], [200, 200, 200, 200])
 
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.WidthOverflow, "bounding boxes is out of bounds of the image")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.HeightOverflow, "bounding boxes is out of bounds of the image")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.NegativeXY, "min_x")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.WrongShape, "4 features")
 
 
 if __name__ == "__main__":
     test_random_crop_with_bbox_op_c(plot_vis=True)
+    test_random_crop_with_bbox_op_coco_c(plot_vis=True)
     test_random_crop_with_bbox_op2_c(plot_vis=True)
     test_random_crop_with_bbox_op3_c(plot_vis=True)
     test_random_crop_with_bbox_op_edge_c(plot_vis=True)
diff --git a/tests/ut/python/dataset/test_random_dataset.py b/tests/ut/python/dataset/test_random_dataset.py
index 4d50be254c2..56a2a931134 100644
--- a/tests/ut/python/dataset/test_random_dataset.py
+++ b/tests/ut/python/dataset/test_random_dataset.py
@@ -16,17 +16,16 @@ import mindspore.common.dtype as mstype
 import mindspore.dataset as ds
 from mindspore import log as logger
 
-
 # just a basic test with parallel random data op
 def test_randomdataset_basic1():
-    logger.info("Test randomdataset basic")
+    logger.info("Test randomdataset basic 1")
 
     schema = ds.Schema()
     schema.add_column('image', de_type=mstype.uint8, shape=[2])
     schema.add_column('label', de_type=mstype.uint8, shape=[1])
 
     # apply dataset operations
-    ds1 = ds.RandomDataset(schema=schema, num_samples=50, num_parallel_workers=4)
+    ds1 = ds.RandomDataset(schema=schema, total_rows=50, num_parallel_workers=4)
     ds1 = ds1.repeat(4)
 
     num_iter = 0
@@ -36,8 +35,9 @@ def test_randomdataset_basic1():
         logger.info("{} label: {}".format(num_iter, data["label"]))
         num_iter += 1
 
-    logger.info("Number of data in ds1: ", num_iter)
+    logger.info("Number of data in ds1: {}".format(num_iter))
     assert num_iter == 200
+    logger.info("Test randomdataset basic 1 complete")
 
 
 # Another simple test
@@ -49,10 +49,8 @@ def test_randomdataset_basic2():
                       shape=[640, 480, 3])  # 921600 bytes (a bit less than 1 MB per image)
     schema.add_column('label', de_type=mstype.uint8, shape=[1])
 
-    # Make up about 10 samples
-    ds1 = ds.RandomDataset(schema=schema, num_samples=10, num_parallel_workers=1)
-
-    # cache size allows for about 4 images since each image just a bit less than 1MB, after that we will have to spill
+    # Make up 10 rows
+    ds1 = ds.RandomDataset(schema=schema, total_rows=10, num_parallel_workers=1)
     ds1 = ds1.repeat(4)
 
     num_iter = 0
@@ -62,11 +60,31 @@ def test_randomdataset_basic2():
         logger.info("printing the label: {}".format(data["label"]))
         num_iter += 1
 
-    logger.info("Number of data in ds1: ", num_iter)
+    logger.info("Number of data in ds1: {}".format(num_iter))
     assert num_iter == 40
+    logger.info("Test randomdataset basic 2 complete")
 
 
+# Another simple test
+def test_randomdataset_basic3():
+    logger.info("Test randomdataset basic 3")
+
+    # Make up 10 samples, but here even the schema is randomly created
+    # The columns are named like this "c0", "c1", "c2" etc
+    # But, we will use a tuple iterator instead of dict iterator so the column names
+    # are not needed to iterate
+    ds1 = ds.RandomDataset(total_rows=10, num_parallel_workers=1)
+    ds1 = ds1.repeat(2)
+
+    num_iter = 0
+    for _ in ds1.create_tuple_iterator():
+        num_iter += 1
+
+    logger.info("Number of data in ds1: {}".format(num_iter))
+    assert num_iter == 20
+    logger.info("Test randomdataset basic 3 Complete")
+
 if __name__ == '__main__':
     test_randomdataset_basic1()
     test_randomdataset_basic2()
-    logger.info('test_randomdataset_basic Ended.\n')
+    test_randomdataset_basic3()
diff --git a/tests/ut/python/dataset/test_random_grayscale.py b/tests/ut/python/dataset/test_random_grayscale.py
index 83514a55f6f..4cb25c3a3a9 100644
--- a/tests/ut/python/dataset/test_random_grayscale.py
+++ b/tests/ut/python/dataset/test_random_grayscale.py
@@ -179,7 +179,7 @@ def test_random_grayscale_invalid_param():
         data = data.map(input_columns=["image"], operations=transform())
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input is not within the required range" in str(e)
+        assert "Input prob is not within the required interval of (0.0 to 1.0)." in str(e)
 
 if __name__ == "__main__":
     test_random_grayscale_valid_prob(True)
diff --git a/tests/ut/python/dataset/test_random_horizontal_flip.py b/tests/ut/python/dataset/test_random_horizontal_flip.py
index 1272148e4fc..ef4f5b8eb6f 100644
--- a/tests/ut/python/dataset/test_random_horizontal_flip.py
+++ b/tests/ut/python/dataset/test_random_horizontal_flip.py
@@ -141,7 +141,7 @@ def test_random_horizontal_invalid_prob_c():
         data = data.map(input_columns=["image"], operations=random_horizontal_op)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input is not" in str(e)
+        assert "Input prob is not within the required interval of (0.0 to 1.0)." in str(e)
 
 
 def test_random_horizontal_invalid_prob_py():
@@ -164,7 +164,7 @@ def test_random_horizontal_invalid_prob_py():
         data = data.map(input_columns=["image"], operations=transform())
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input is not" in str(e)
+        assert "Input prob is not within the required interval of (0.0 to 1.0)." in str(e)
 
 
 def test_random_horizontal_comp(plot=False):
diff --git a/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py b/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py
index 94ab843ce18..4fd51a7a035 100644
--- a/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py
+++ b/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py
@@ -24,33 +24,15 @@ from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox,
 
 GENERATE_GOLDEN = False
 
+# updated VOC dataset with correct annotations
 DATA_DIR = "../data/dataset/testVOC2012_2"
-
-
-def fix_annotate(bboxes):
-    """
-    Fix annotations to format followed by mindspore.
-    :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format
-    :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format
-    """
-    for bbox in bboxes:
-        if bbox.size == 7:
-            tmp = bbox[0]
-            bbox[0] = bbox[1]
-            bbox[1] = bbox[2]
-            bbox[2] = bbox[3]
-            bbox[3] = bbox[4]
-            bbox[4] = tmp
-        else:
-            print("ERROR: Invalid Bounding Box size provided")
-            break
-    return bboxes
+DATA_DIR_2 = ["../data/dataset/testCOCO/train/",
+              "../data/dataset/testCOCO/annotations/train.json"]  # DATA_DIR, ANNOTATION_DIR
 
 
 def test_random_horizontal_flip_with_bbox_op_c(plot_vis=False):
     """
-    Prints images side by side with and without Aug applied + bboxes to
-    compare and test
+    Prints images and bboxes side by side with and without RandomHorizontalFlipWithBBox Op applied
     """
     logger.info("test_random_horizontal_flip_with_bbox_op_c")
 
@@ -63,14 +45,6 @@ def test_random_horizontal_flip_with_bbox_op_c(plot_vis=False):
 
     test_op = c_vision.RandomHorizontalFlipWithBBox(1)
 
-    # maps to fix annotations to minddata standard
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
@@ -86,7 +60,37 @@ def test_random_horizontal_flip_with_bbox_op_c(plot_vis=False):
         visualize_with_bounding_boxes(unaugSamp, augSamp)
 
 
-def test_random_horizontal_bbox_with_bbox_valid_rand_c(plot_vis=False):
+def test_random_horizontal_flip_with_bbox_op_coco_c(plot_vis=False):
+    """
+    Prints images and bboxes side by side with and without RandomHorizontalFlipWithBBox Op applied,
+    Testing with COCO dataset
+    """
+    logger.info("test_random_horizontal_flip_with_bbox_op_coco_c")
+
+    dataCoco1 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    dataCoco2 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    test_op = c_vision.RandomHorizontalFlipWithBBox(1)
+
+    dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"],
+                              output_columns=["image", "bbox"],
+                              columns_order=["image", "bbox"],
+                              operations=[test_op])
+
+    unaugSamp, augSamp = [], []
+
+    for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()):
+        unaugSamp.append(unAug)
+        augSamp.append(Aug)
+
+    if plot_vis:
+        visualize_with_bounding_boxes(unaugSamp, augSamp, "bbox")
+
+
+def test_random_horizontal_flip_with_bbox_valid_rand_c(plot_vis=False):
     """
     Uses a valid non-default input, expect to pass
     Prints images side by side with and without Aug applied + bboxes to
@@ -106,13 +110,6 @@ def test_random_horizontal_bbox_with_bbox_valid_rand_c(plot_vis=False):
 
     test_op = c_vision.RandomHorizontalFlipWithBBox(0.6)
 
-    # maps to fix annotations to minddata standard
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -148,25 +145,18 @@ def test_random_horizontal_flip_with_bbox_valid_edge_c(plot_vis=False):
 
     test_op = c_vision.RandomHorizontalFlipWithBBox(1)
 
-    # maps to fix annotations to minddata standard
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     # Add column for "annotation"
     dataVoc1 = dataVoc1.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
                             operations=lambda img, bbox:
-                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.uint32)))
+                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
                             operations=lambda img, bbox:
-                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.uint32)))
+                            (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
@@ -193,9 +183,6 @@ def test_random_horizontal_flip_with_bbox_invalid_prob_c():
     try:
         # Note: Valid range of prob should be [0.0, 1.0]
         test_op = c_vision.RandomHorizontalFlipWithBBox(1.5)
-        dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                                output_columns=["annotation"],
-                                operations=fix_annotate)
         # map to apply ops
         dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                                 output_columns=["image", "annotation"],
@@ -203,7 +190,7 @@ def test_random_horizontal_flip_with_bbox_invalid_prob_c():
                                 operations=[test_op])  # Add column for "annotation"
     except ValueError as error:
         logger.info("Got an exception in DE: {}".format(str(error)))
-        assert "Input is not" in str(error)
+        assert "Input prob is not within the required interval of (0.0 to 1.0)." in str(error)
 
 
 def test_random_horizontal_flip_with_bbox_invalid_bounds_c():
@@ -227,7 +214,8 @@ def test_random_horizontal_flip_with_bbox_invalid_bounds_c():
 if __name__ == "__main__":
     # set to false to not show plots
     test_random_horizontal_flip_with_bbox_op_c(plot_vis=False)
-    test_random_horizontal_bbox_with_bbox_valid_rand_c(plot_vis=False)
+    test_random_horizontal_flip_with_bbox_op_coco_c(plot_vis=False)
+    test_random_horizontal_flip_with_bbox_valid_rand_c(plot_vis=False)
     test_random_horizontal_flip_with_bbox_valid_edge_c(plot_vis=False)
     test_random_horizontal_flip_with_bbox_invalid_prob_c()
     test_random_horizontal_flip_with_bbox_invalid_bounds_c()
diff --git a/tests/ut/python/dataset/test_random_perspective.py b/tests/ut/python/dataset/test_random_perspective.py
index 507c9cdb804..992bf2b2227 100644
--- a/tests/ut/python/dataset/test_random_perspective.py
+++ b/tests/ut/python/dataset/test_random_perspective.py
@@ -67,7 +67,7 @@ def test_random_perspective_op(plot=False):
         visualize_list(image_original, image_perspective)
 
 
-def test_random_perspective_md5():
+def skip_test_random_perspective_md5():
     """
     Test RandomPerspective with md5 comparison
     """
@@ -107,7 +107,7 @@ def test_random_perspective_exception_distortion_scale_range():
         _ = py_vision.RandomPerspective(distortion_scale=1.5)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "Input is not within the required range"
+        assert str(e) == "Input distortion_scale is not within the required interval of (0.0 to 1.0)."
 
 
 def test_random_perspective_exception_prob_range():
@@ -119,11 +119,11 @@ def test_random_perspective_exception_prob_range():
         _ = py_vision.RandomPerspective(prob=1.2)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "Input is not within the required range"
+        assert str(e) == "Input prob is not within the required interval of (0.0 to 1.0)."
 
 
 if __name__ == "__main__":
     test_random_perspective_op(plot=True)
-    test_random_perspective_md5()
+    skip_test_random_perspective_md5()
     test_random_perspective_exception_distortion_scale_range()
     test_random_perspective_exception_prob_range()
diff --git a/tests/ut/python/dataset/test_random_resize_with_bbox.py b/tests/ut/python/dataset/test_random_resize_with_bbox.py
index 4aadf9ef011..94f9d12427b 100644
--- a/tests/ut/python/dataset/test_random_resize_with_bbox.py
+++ b/tests/ut/python/dataset/test_random_resize_with_bbox.py
@@ -26,32 +26,18 @@ from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox,
 GENERATE_GOLDEN = False
 
 DATA_DIR = "../data/dataset/testVOC2012_2"
+DATA_DIR_2 = ["../data/dataset/testCOCO/train/",
+              "../data/dataset/testCOCO/annotations/train.json"]  # DATA_DIR, ANNOTATION_DIR
 
 
-def fix_annotate(bboxes):
+def test_random_resize_with_bbox_op_voc_c(plot_vis=False):
     """
-    Fix annotations to format followed by mindspore.
-    :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format
-    :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format
+    Prints images and bboxes side by side with and without RandomResizeWithBBox Op applied
+    testing with VOC dataset
     """
-    for (i, box) in enumerate(bboxes):
-        if box.size == 7:
-            bboxes[i] = np.roll(box, -1)
-        else:
-            print("ERROR: Invalid Bounding Box size provided")
-            break
-    return bboxes
-
-
-def test_random_resize_with_bbox_op_rand_c(plot_vis=False):
-    """
-    Prints images and bboxes side by side with and without RandomResizeWithBBox Op applied,
-    tests with MD5 check, expected to pass
-    """
-    logger.info("test_random_resize_with_bbox_rand_c")
-    original_seed = config_get_set_seed(1)
+    logger.info("test_random_resize_with_bbox_op_voc_c")
+    original_seed = config_get_set_seed(123)
     original_num_parallel_workers = config_get_set_num_parallel_workers(1)
-
     # Load dataset
     dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
                              decode=True, shuffle=False)
@@ -59,21 +45,15 @@ def test_random_resize_with_bbox_op_rand_c(plot_vis=False):
     dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
-    test_op = c_vision.RandomResizeWithBBox(200)
+    test_op = c_vision.RandomResizeWithBBox(100)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
                             operations=[test_op])
 
-    filename = "random_resize_with_bbox_op_01_c_result.npz"
+    filename = "random_resize_with_bbox_op_01_c_voc_result.npz"
     save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
 
     unaugSamp, augSamp = [], []
@@ -90,6 +70,49 @@ def test_random_resize_with_bbox_op_rand_c(plot_vis=False):
     ds.config.set_num_parallel_workers(original_num_parallel_workers)
 
 
+def test_random_resize_with_bbox_op_rand_coco_c(plot_vis=False):
+    """
+    Prints images and bboxes side by side with and without RandomResizeWithBBox Op applied,
+    tests with MD5 check, expected to pass
+    testing with COCO dataset
+    """
+    logger.info("test_random_resize_with_bbox_op_rand_coco_c")
+    original_seed = config_get_set_seed(231)
+    original_num_parallel_workers = config_get_set_num_parallel_workers(1)
+
+    # Load dataset
+    dataCoco1 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    dataCoco2 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    test_op = c_vision.RandomResizeWithBBox(200)
+
+    # map to apply ops
+
+    dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"],
+                              output_columns=["image", "bbox"],
+                              columns_order=["image", "bbox"],
+                              operations=[test_op])
+
+    filename = "random_resize_with_bbox_op_01_c_coco_result.npz"
+    save_and_check_md5(dataCoco2, filename, generate_golden=GENERATE_GOLDEN)
+
+    unaugSamp, augSamp = [], []
+
+    for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()):
+        unaugSamp.append(unAug)
+        augSamp.append(Aug)
+
+    if plot_vis:
+        visualize_with_bounding_boxes(unaugSamp, augSamp, annot_name="bbox")
+
+    # Restore config setting
+    ds.config.set_seed(original_seed)
+    ds.config.set_num_parallel_workers(original_num_parallel_workers)
+
+
 def test_random_resize_with_bbox_op_edge_c(plot_vis=False):
     """
     Prints images and bboxes side by side with and without RandomresizeWithBBox Op applied,
@@ -105,13 +128,6 @@ def test_random_resize_with_bbox_op_edge_c(plot_vis=False):
 
     test_op = c_vision.RandomResizeWithBBox(500)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-
     # maps to convert data into valid edge case data
     dataVoc1 = dataVoc1.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -147,7 +163,7 @@ def test_random_resize_with_bbox_op_invalid_c():
 
     except ValueError as err:
         logger.info("Got an exception in DE: {}".format(str(err)))
-        assert "Input is not" in str(err)
+        assert "Input is not within the required interval of (1 to 16777216)." in str(err)
 
     try:
         # one of the size values is zero
@@ -155,7 +171,7 @@ def test_random_resize_with_bbox_op_invalid_c():
 
     except ValueError as err:
         logger.info("Got an exception in DE: {}".format(str(err)))
-        assert "Input is not" in str(err)
+        assert "Input size at dim 0 is not within the required interval of (1 to 2147483647)." in str(err)
 
     try:
         # negative value for resize
@@ -163,7 +179,7 @@ def test_random_resize_with_bbox_op_invalid_c():
 
     except ValueError as err:
         logger.info("Got an exception in DE: {}".format(str(err)))
-        assert "Input is not" in str(err)
+        assert "Input is not within the required interval of (1 to 16777216)." in str(err)
 
     try:
         # invalid input shape
@@ -192,7 +208,8 @@ def test_random_resize_with_bbox_op_bad_c():
 
 
 if __name__ == "__main__":
-    test_random_resize_with_bbox_op_rand_c(plot_vis=False)
+    test_random_resize_with_bbox_op_voc_c(plot_vis=False)
+    test_random_resize_with_bbox_op_rand_coco_c(plot_vis=False)
     test_random_resize_with_bbox_op_edge_c(plot_vis=False)
     test_random_resize_with_bbox_op_invalid_c()
     test_random_resize_with_bbox_op_bad_c()
diff --git a/tests/ut/python/dataset/test_random_sharpness.py b/tests/ut/python/dataset/test_random_sharpness.py
index d8207ff099b..22e5c66f1a1 100644
--- a/tests/ut/python/dataset/test_random_sharpness.py
+++ b/tests/ut/python/dataset/test_random_sharpness.py
@@ -97,7 +97,7 @@ def test_random_sharpness_md5():
     # define map operations
     transforms = [
         F.Decode(),
-        F.RandomSharpness((0.5, 1.5)),
+        F.RandomSharpness((0.1, 1.9)),
         F.ToTensor()
     ]
     transform = F.ComposeOp(transforms)
diff --git a/tests/ut/python/dataset/test_random_vertical_flip.py b/tests/ut/python/dataset/test_random_vertical_flip.py
index 2fc9b127745..a3d02959fdd 100644
--- a/tests/ut/python/dataset/test_random_vertical_flip.py
+++ b/tests/ut/python/dataset/test_random_vertical_flip.py
@@ -141,7 +141,7 @@ def test_random_vertical_invalid_prob_c():
         data = data.map(input_columns=["image"], operations=random_horizontal_op)
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input is not" in str(e)
+        assert 'Input prob is not within the required interval of (0.0 to 1.0).' in str(e)
 
 
 def test_random_vertical_invalid_prob_py():
@@ -163,7 +163,7 @@ def test_random_vertical_invalid_prob_py():
         data = data.map(input_columns=["image"], operations=transform())
     except ValueError as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "Input is not" in str(e)
+        assert 'Input prob is not within the required interval of (0.0 to 1.0).' in str(e)
 
 
 def test_random_vertical_comp(plot=False):
diff --git a/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py b/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py
index f746bd50b01..490dc3e419b 100644
--- a/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py
+++ b/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py
@@ -25,50 +25,26 @@ from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox,
 
 GENERATE_GOLDEN = False
 
-# updated VOC dataset with correct annotations
-DATA_DIR = "../data/dataset/testVOC2012_2"
-
-
-def fix_annotate(bboxes):
-    """
-    Fix annotations to format followed by mindspore.
-    :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format
-    :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format
-    """
-    for bbox in bboxes:
-        if bbox.size == 7:
-            tmp = bbox[0]
-            bbox[0] = bbox[1]
-            bbox[1] = bbox[2]
-            bbox[2] = bbox[3]
-            bbox[3] = bbox[4]
-            bbox[4] = tmp
-        else:
-            print("ERROR: Invalid Bounding Box size provided")
-            break
-    return bboxes
+# Updated VOC dataset with correct annotations - DATA_DIR
+DATA_DIR_VOC = "../data/dataset/testVOC2012_2"
+# COCO dataset - DATA_DIR, ANNOTATION_DIR
+DATA_DIR_COCO = ["../data/dataset/testCOCO/train/", "../data/dataset/testCOCO/annotations/train.json"]
 
 
 def test_random_vertical_flip_with_bbox_op_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied
+    Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied
     """
     logger.info("test_random_vertical_flip_with_bbox_op_c")
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
     test_op = c_vision.RandomVerticalFlipWithBBox(1)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -84,31 +60,56 @@ def test_random_vertical_flip_with_bbox_op_c(plot_vis=False):
     if plot_vis:
         visualize_with_bounding_boxes(unaugSamp, augSamp)
 
+def test_random_vertical_flip_with_bbox_op_coco_c(plot_vis=False):
+    """
+    Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied,
+    Testing with Coco dataset
+    """
+    logger.info("test_random_vertical_flip_with_bbox_op_coco_c")
+    # load dataset
+    dataCoco1 = ds.CocoDataset(DATA_DIR_COCO[0], annotation_file=DATA_DIR_COCO[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    dataCoco2 = ds.CocoDataset(DATA_DIR_COCO[0], annotation_file=DATA_DIR_COCO[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    test_op = c_vision.RandomVerticalFlipWithBBox(1)
+
+    dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"],
+                              output_columns=["image", "bbox"],
+                              columns_order=["image", "bbox"],
+                              operations=[test_op])
+
+    test_op = c_vision.RandomVerticalFlipWithBBox(1)
+
+    unaugSamp, augSamp = [], []
+
+    for unAug, Aug in zip(dataCoco1.create_dict_iterator(), dataCoco2.create_dict_iterator()):
+        unaugSamp.append(unAug)
+        augSamp.append(Aug)
+
+    if plot_vis:
+        visualize_with_bounding_boxes(unaugSamp, augSamp, "bbox")
+
 
 def test_random_vertical_flip_with_bbox_op_rand_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied,
-     tests with MD5 check, expected to pass
+    Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied,
+    tests with MD5 check, expected to pass
     """
     logger.info("test_random_vertical_flip_with_bbox_op_rand_c")
     original_seed = config_get_set_seed(29847)
     original_num_parallel_workers = config_get_set_num_parallel_workers(1)
 
     # Load dataset
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
     test_op = c_vision.RandomVerticalFlipWithBBox(0.8)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -134,25 +135,18 @@ def test_random_vertical_flip_with_bbox_op_rand_c(plot_vis=False):
 
 def test_random_vertical_flip_with_bbox_op_edge_c(plot_vis=False):
     """
-     Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied,
+    Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied,
     applied on dynamically generated edge case, expected to pass
     """
     logger.info("test_random_vertical_flip_with_bbox_op_edge_c")
-    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
+    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
     test_op = c_vision.RandomVerticalFlipWithBBox(1)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-
     # maps to convert data into valid edge case data
     dataVoc1 = dataVoc1.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -177,17 +171,15 @@ def test_random_vertical_flip_with_bbox_op_edge_c(plot_vis=False):
 
 def test_random_vertical_flip_with_bbox_op_invalid_c():
     """
-     Test RandomVerticalFlipWithBBox Op on invalid constructor parameters, expected to raise ValueError
+    Test RandomVerticalFlipWithBBox Op on invalid constructor parameters, expected to raise ValueError
     """
     logger.info("test_random_vertical_flip_with_bbox_op_invalid_c")
-    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
+    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
     try:
         test_op = c_vision.RandomVerticalFlipWithBBox(2)
-        dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                                output_columns=["annotation"],
-                                operations=fix_annotate)
+
         # map to apply ops
         dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                                 output_columns=["image", "annotation"],
@@ -199,7 +191,7 @@ def test_random_vertical_flip_with_bbox_op_invalid_c():
 
     except ValueError as err:
         logger.info("Got an exception in DE: {}".format(str(err)))
-        assert "Input is not" in str(err)
+        assert "Input prob is not within the required interval of (0.0 to 1.0)." in str(err)
 
 
 def test_random_vertical_flip_with_bbox_op_bad_c():
@@ -209,18 +201,19 @@ def test_random_vertical_flip_with_bbox_op_bad_c():
     logger.info("test_random_vertical_flip_with_bbox_op_bad_c")
     test_op = c_vision.RandomVerticalFlipWithBBox(1)
 
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.WidthOverflow, "bounding boxes is out of bounds of the image")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.HeightOverflow, "bounding boxes is out of bounds of the image")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.NegativeXY, "min_x")
-    data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
+    data_voc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False)
     check_bad_bbox(data_voc2, test_op, InvalidBBoxType.WrongShape, "4 features")
 
 
 if __name__ == "__main__":
     test_random_vertical_flip_with_bbox_op_c(plot_vis=True)
+    test_random_vertical_flip_with_bbox_op_coco_c(plot_vis=True)
     test_random_vertical_flip_with_bbox_op_rand_c(plot_vis=True)
     test_random_vertical_flip_with_bbox_op_edge_c(plot_vis=True)
     test_random_vertical_flip_with_bbox_op_invalid_c()
diff --git a/tests/ut/python/dataset/test_repeat.py b/tests/ut/python/dataset/test_repeat.py
index 4bdde7beeb9..ca4702ff8cd 100644
--- a/tests/ut/python/dataset/test_repeat.py
+++ b/tests/ut/python/dataset/test_repeat.py
@@ -12,25 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""
+Test Repeat Op
+"""
 import numpy as np
-from util import save_and_check
 
 import mindspore.dataset as ds
 import mindspore.dataset.transforms.vision.c_transforms as vision
 from mindspore import log as logger
+from util import save_and_check_dict
 
 DATA_DIR_TF = ["../data/dataset/testTFTestAllTypes/test.data"]
 SCHEMA_DIR_TF = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
-COLUMNS_TF = ["col_1d", "col_2d", "col_3d", "col_binary", "col_float",
-              "col_sint16", "col_sint32", "col_sint64"]
-GENERATE_GOLDEN = False
-
-IMG_DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
-IMG_SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
 
 DATA_DIR_TF2 = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
 SCHEMA_DIR_TF2 = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
 
+GENERATE_GOLDEN = False
+
 
 def test_tf_repeat_01():
     """
@@ -39,14 +38,13 @@ def test_tf_repeat_01():
     logger.info("Test Simple Repeat")
     # define parameters
     repeat_count = 2
-    parameters = {"params": {'repeat_count': repeat_count}}
 
     # apply dataset operations
     data1 = ds.TFRecordDataset(DATA_DIR_TF, SCHEMA_DIR_TF, shuffle=False)
     data1 = data1.repeat(repeat_count)
 
     filename = "repeat_result.npz"
-    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
+    save_and_check_dict(data1, filename, generate_golden=GENERATE_GOLDEN)
 
 
 def test_tf_repeat_02():
@@ -99,14 +97,13 @@ def test_tf_repeat_04():
     logger.info("Test Simple Repeat Column List")
     # define parameters
     repeat_count = 2
-    parameters = {"params": {'repeat_count': repeat_count}}
     columns_list = ["col_sint64", "col_sint32"]
     # apply dataset operations
     data1 = ds.TFRecordDataset(DATA_DIR_TF, SCHEMA_DIR_TF, columns_list=columns_list, shuffle=False)
     data1 = data1.repeat(repeat_count)
 
     filename = "repeat_list_result.npz"
-    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
+    save_and_check_dict(data1, filename, generate_golden=GENERATE_GOLDEN)
 
 
 def generator():
@@ -115,6 +112,7 @@ def generator():
 
 
 def test_nested_repeat1():
+    logger.info("test_nested_repeat1")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(2)
     data = data.repeat(3)
@@ -126,6 +124,7 @@ def test_nested_repeat1():
 
 
 def test_nested_repeat2():
+    logger.info("test_nested_repeat2")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(1)
     data = data.repeat(1)
@@ -137,6 +136,7 @@ def test_nested_repeat2():
 
 
 def test_nested_repeat3():
+    logger.info("test_nested_repeat3")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(1)
     data = data.repeat(2)
@@ -148,6 +148,7 @@ def test_nested_repeat3():
 
 
 def test_nested_repeat4():
+    logger.info("test_nested_repeat4")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(2)
     data = data.repeat(1)
@@ -159,6 +160,7 @@ def test_nested_repeat4():
 
 
 def test_nested_repeat5():
+    logger.info("test_nested_repeat5")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.batch(3)
     data = data.repeat(2)
@@ -171,6 +173,7 @@ def test_nested_repeat5():
 
 
 def test_nested_repeat6():
+    logger.info("test_nested_repeat6")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(2)
     data = data.batch(3)
@@ -183,6 +186,7 @@ def test_nested_repeat6():
 
 
 def test_nested_repeat7():
+    logger.info("test_nested_repeat7")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(2)
     data = data.repeat(3)
@@ -195,6 +199,7 @@ def test_nested_repeat7():
 
 
 def test_nested_repeat8():
+    logger.info("test_nested_repeat8")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.batch(2, drop_remainder=False)
     data = data.repeat(2)
@@ -210,6 +215,7 @@ def test_nested_repeat8():
 
 
 def test_nested_repeat9():
+    logger.info("test_nested_repeat9")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat()
     data = data.repeat(3)
@@ -221,6 +227,7 @@ def test_nested_repeat9():
 
 
 def test_nested_repeat10():
+    logger.info("test_nested_repeat10")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(3)
     data = data.repeat()
@@ -232,6 +239,7 @@ def test_nested_repeat10():
 
 
 def test_nested_repeat11():
+    logger.info("test_nested_repeat11")
     data = ds.GeneratorDataset(generator, ["data"])
     data = data.repeat(2)
     data = data.repeat(3)
diff --git a/tests/ut/python/dataset/test_resize_with_bbox.py b/tests/ut/python/dataset/test_resize_with_bbox.py
index 06f3937958a..3bb731ee970 100644
--- a/tests/ut/python/dataset/test_resize_with_bbox.py
+++ b/tests/ut/python/dataset/test_resize_with_bbox.py
@@ -26,29 +26,16 @@ from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox,
 GENERATE_GOLDEN = False
 
 DATA_DIR = "../data/dataset/testVOC2012_2"
+DATA_DIR_2 = ["../data/dataset/testCOCO/train/",
+              "../data/dataset/testCOCO/annotations/train.json"]  # DATA_DIR, ANNOTATION_DIR
 
 
-def fix_annotate(bboxes):
+def test_resize_with_bbox_op_voc_c(plot_vis=False):
     """
-    Fix annotations to format followed by mindspore.
-    :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format
-    :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format
+    Prints images and bboxes side by side with and without ResizeWithBBox Op applied
+    testing with VOC dataset
     """
-    for (i, box) in enumerate(bboxes):
-        if box.size == 7:
-            bboxes[i] = np.roll(box, -1)
-        else:
-            print("ERROR: Invalid Bounding Box size provided")
-            break
-    return bboxes
-
-
-def test_resize_with_bbox_op_c(plot_vis=False):
-    """
-    Prints images and bboxes side by side with and without ResizeWithBBox Op applied,
-    tests with MD5 check, expected to pass
-    """
-    logger.info("test_resize_with_bbox_op_c")
+    logger.info("test_resize_with_bbox_op_voc_c")
 
     # Load dataset
     dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
@@ -57,21 +44,15 @@ def test_resize_with_bbox_op_c(plot_vis=False):
     dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train",
                              decode=True, shuffle=False)
 
-    test_op = c_vision.ResizeWithBBox(200)
+    test_op = c_vision.ResizeWithBBox(100)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
     # map to apply ops
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
                             operations=[test_op])
 
-    filename = "resize_with_bbox_op_01_c_result.npz"
+    filename = "resize_with_bbox_op_01_c_voc_result.npz"
     save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
 
     unaugSamp, augSamp = [], []
@@ -84,6 +65,43 @@ def test_resize_with_bbox_op_c(plot_vis=False):
         visualize_with_bounding_boxes(unaugSamp, augSamp)
 
 
+def test_resize_with_bbox_op_coco_c(plot_vis=False):
+    """
+    Prints images and bboxes side by side with and without ResizeWithBBox Op applied,
+    tests with MD5 check, expected to pass
+    Testing with COCO dataset
+    """
+    logger.info("test_resize_with_bbox_op_coco_c")
+
+    # Load dataset
+    dataCOCO1 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    dataCOCO2 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
+                               decode=True, shuffle=False)
+
+    test_op = c_vision.ResizeWithBBox(200)
+
+    # map to apply ops
+
+    dataCOCO2 = dataCOCO2.map(input_columns=["image", "bbox"],
+                              output_columns=["image", "bbox"],
+                              columns_order=["image", "bbox"],
+                              operations=[test_op])
+
+    filename = "resize_with_bbox_op_01_c_coco_result.npz"
+    save_and_check_md5(dataCOCO2, filename, generate_golden=GENERATE_GOLDEN)
+
+    unaugSamp, augSamp = [], []
+
+    for unAug, Aug in zip(dataCOCO1.create_dict_iterator(), dataCOCO2.create_dict_iterator()):
+        unaugSamp.append(unAug)
+        augSamp.append(Aug)
+
+    if plot_vis:
+        visualize_with_bounding_boxes(unaugSamp, augSamp, annot_name="bbox")
+
+
 def test_resize_with_bbox_op_edge_c(plot_vis=False):
     """
     Prints images and bboxes side by side with and without ResizeWithBBox Op applied,
@@ -99,13 +117,6 @@ def test_resize_with_bbox_op_edge_c(plot_vis=False):
 
     test_op = c_vision.ResizeWithBBox(500)
 
-    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
-                            output_columns=["annotation"],
-                            operations=fix_annotate)
-
     # maps to convert data into valid edge case data
     dataVoc1 = dataVoc1.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
@@ -113,7 +124,6 @@ def test_resize_with_bbox_op_edge_c(plot_vis=False):
                             operations=[lambda img, bboxes: (
                                 img, np.array([[0, 0, img.shape[1], img.shape[0]]]).astype(bboxes.dtype))])
 
-    # Test Op added to list of Operations here
     dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                             output_columns=["image", "annotation"],
                             columns_order=["image", "annotation"],
@@ -140,7 +150,7 @@ def test_resize_with_bbox_op_invalid_c():
         # invalid interpolation value
         c_vision.ResizeWithBBox(400, interpolation="invalid")
 
-    except ValueError as err:
+    except TypeError as err:
         logger.info("Got an exception in DE: {}".format(str(err)))
         assert "interpolation" in str(err)
 
@@ -163,7 +173,8 @@ def test_resize_with_bbox_op_bad_c():
 
 
 if __name__ == "__main__":
-    test_resize_with_bbox_op_c(plot_vis=False)
+    test_resize_with_bbox_op_voc_c(plot_vis=False)
+    test_resize_with_bbox_op_coco_c(plot_vis=False)
     test_resize_with_bbox_op_edge_c(plot_vis=False)
     test_resize_with_bbox_op_invalid_c()
     test_resize_with_bbox_op_bad_c()
diff --git a/tests/ut/python/dataset/test_shuffle.py b/tests/ut/python/dataset/test_shuffle.py
index 56cc65a23b2..460c491ca1b 100644
--- a/tests/ut/python/dataset/test_shuffle.py
+++ b/tests/ut/python/dataset/test_shuffle.py
@@ -154,7 +154,7 @@ def test_shuffle_exception_01():
 
     except Exception as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "buffer_size" in str(e)
+        assert "Input buffer_size is not within the required interval of (2 to 2147483647)" in str(e)
 
 
 def test_shuffle_exception_02():
@@ -172,7 +172,7 @@ def test_shuffle_exception_02():
 
     except Exception as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "buffer_size" in str(e)
+        assert "Input buffer_size is not within the required interval of (2 to 2147483647)" in str(e)
 
 
 def test_shuffle_exception_03():
@@ -190,7 +190,7 @@ def test_shuffle_exception_03():
 
     except Exception as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "buffer_size" in str(e)
+        assert "Input buffer_size is not within the required interval of (2 to 2147483647)" in str(e)
 
 
 def test_shuffle_exception_05():
diff --git a/tests/ut/python/dataset/test_sync_wait.py b/tests/ut/python/dataset/test_sync_wait.py
index a5727a29911..eb2261a5d34 100644
--- a/tests/ut/python/dataset/test_sync_wait.py
+++ b/tests/ut/python/dataset/test_sync_wait.py
@@ -14,7 +14,7 @@
 # ==============================================================================
 
 import numpy as np
-
+import pytest
 import mindspore.dataset as ds
 from mindspore import log as logger
 
@@ -163,7 +163,6 @@ def test_sync_exception_01():
     """
     logger.info("test_sync_exception_01")
     shuffle_size = 4
-    batch_size = 10
 
     dataset = ds.GeneratorDataset(gen, column_names=["input"])
 
@@ -171,11 +170,9 @@ def test_sync_exception_01():
     dataset = dataset.sync_wait(condition_name="policy", callback=aug.update)
     dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess])
 
-    try:
-        dataset = dataset.shuffle(shuffle_size)
-    except Exception as e:
-        assert "shuffle" in str(e)
-    dataset = dataset.batch(batch_size)
+    with pytest.raises(RuntimeError) as e:
+        dataset.shuffle(shuffle_size)
+    assert "No shuffle after sync operators" in str(e.value)
 
 
 def test_sync_exception_02():
@@ -183,7 +180,6 @@ def test_sync_exception_02():
     Test sync: with duplicated condition name
     """
     logger.info("test_sync_exception_02")
-    batch_size = 6
 
     dataset = ds.GeneratorDataset(gen, column_names=["input"])
 
@@ -192,11 +188,9 @@ def test_sync_exception_02():
 
     dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess])
 
-    try:
-        dataset = dataset.sync_wait(num_batch=2, condition_name="every batch")
-    except Exception as e:
-        assert "name" in str(e)
-    dataset = dataset.batch(batch_size)
+    with pytest.raises(RuntimeError) as e:
+        dataset.sync_wait(num_batch=2, condition_name="every batch")
+    assert "Condition name is already in use" in str(e.value)
 
 
 def test_sync_exception_03():
@@ -209,12 +203,9 @@ def test_sync_exception_03():
 
     aug = Augment(0)
     # try to create dataset with batch_size < 0
-    try:
-        dataset = dataset.sync_wait(condition_name="every batch", num_batch=-1, callback=aug.update)
-    except Exception as e:
-        assert "num_batch" in str(e)
-
-    dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess])
+    with pytest.raises(ValueError) as e:
+        dataset.sync_wait(condition_name="every batch", num_batch=-1, callback=aug.update)
+    assert "num_batch need to be greater than 0." in str(e.value)
 
 
 def test_sync_exception_04():
@@ -230,14 +221,13 @@ def test_sync_exception_04():
     dataset = dataset.sync_wait(condition_name="every batch", callback=aug.update)
     dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess])
     count = 0
-    try:
+    with pytest.raises(RuntimeError) as e:
         for _ in dataset.create_dict_iterator():
             count += 1
             data = {"loss": count}
-            # dataset.disable_sync()
             dataset.sync_update(condition_name="every batch", num_batch=-1, data=data)
-    except Exception as e:
-        assert "batch" in str(e)
+    assert "Sync_update batch size can only be positive" in str(e.value)
+
 
 def test_sync_exception_05():
     """
@@ -251,15 +241,15 @@ def test_sync_exception_05():
     # try to create dataset with batch_size < 0
     dataset = dataset.sync_wait(condition_name="every batch", callback=aug.update)
     dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess])
-    try:
+    with pytest.raises(RuntimeError) as e:
         for _ in dataset.create_dict_iterator():
             dataset.disable_sync()
             count += 1
             data = {"loss": count}
             dataset.disable_sync()
             dataset.sync_update(condition_name="every", data=data)
-    except Exception as e:
-        assert "name" in str(e)
+    assert "Condition name not found" in str(e.value)
+
 
 if __name__ == "__main__":
     test_simple_sync_wait()
diff --git a/tests/ut/python/dataset/test_ten_crop.py b/tests/ut/python/dataset/test_ten_crop.py
index 7bffea5cc9d..d196bc05cf5 100644
--- a/tests/ut/python/dataset/test_ten_crop.py
+++ b/tests/ut/python/dataset/test_ten_crop.py
@@ -62,7 +62,7 @@ def util_test_ten_crop(crop_size, vertical_flip=False, plot=False):
         logger.info("dtype of image_2: {}".format(image_2.dtype))
 
         if plot:
-            visualize_list(np.array([image_1]*10), (image_2 * 255).astype(np.uint8).transpose(0, 2, 3, 1))
+            visualize_list(np.array([image_1] * 10), (image_2 * 255).astype(np.uint8).transpose(0, 2, 3, 1))
 
         # The output data should be of a 4D tensor shape, a stack of 10 images.
         assert len(image_2.shape) == 4
@@ -144,7 +144,7 @@ def test_ten_crop_invalid_size_error_msg():
             vision.TenCrop(0),
             lambda images: np.stack([vision.ToTensor()(image) for image in images])  # 4D stack of 10 images
         ]
-    error_msg = "Input is not within the required range"
+    error_msg = "Input is not within the required interval of (1 to 16777216)."
     assert error_msg == str(info.value)
 
     with pytest.raises(ValueError) as info:
diff --git a/tests/ut/python/dataset/test_text_basic_tokenizer.py b/tests/ut/python/dataset/test_text_basic_tokenizer.py
new file mode 100644
index 00000000000..822790fd608
--- /dev/null
+++ b/tests/ut/python/dataset/test_text_basic_tokenizer.py
@@ -0,0 +1,138 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Testing BasicTokenizer op in DE
+"""
+import numpy as np
+import mindspore.dataset as ds
+from mindspore import log as logger
+import mindspore.dataset.text as text
+
+BASIC_TOKENIZER_FILE = "../data/dataset/testTokenizerData/basic_tokenizer.txt"
+
+test_paras = [
+    dict(
+        first=1,
+        last=6,
+        expected_tokens=
+        [['Welcome', 'to', 'Beijing', '北', '京', '欢', '迎', '您'],
+         ['長', '風', '破', '浪', '會', '有', '時', '，', '直', '掛', '雲', '帆', '濟', '滄', '海'],
+         ['😀', '嘿', '嘿', '😃', '哈', '哈', '😄', '大', '笑', '😁', '嘻', '嘻'],
+         ['明', '朝', '（', '1368', '—', '1644', '年', '）', '和', '清', '朝',
+          '（', '1644', '—', '1911', '年', '）', '，', '是', '中', '国', '封',
+          '建', '王', '朝', '史', '上', '最', '后', '两', '个', '朝', '代'],
+         ['明', '代', '（', '1368', '-', '1644', '）', 'と', '清', '代',
+          '（', '1644', '-', '1911', '）', 'は', '、', '中', '国', 'の', '封',
+          '建', '王', '朝', 'の', '歴', '史', 'における', '最', '後', 'の2つの', '王', '朝', 'でした'],
+         ['명나라', '(', '1368', '-', '1644', ')', '와', '청나라', '(', '1644', '-', '1911', ')', '는',
+          '중국', '봉건', '왕조의', '역사에서', '마지막', '두', '왕조였다']],
+        expected_offsets_start=[[0, 8, 11, 18, 21, 24, 27, 30],
+                                [0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42],
+                                [0, 4, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37],
+                                [0, 3, 6, 9, 13, 16, 20, 23, 26, 29, 32, 35, 38, 42, 45, 49,
+                                 52, 55, 58, 61, 64, 67, 70, 73, 76, 79, 82, 85, 88, 91, 94, 97, 100],
+                                [0, 3, 6, 9, 13, 14, 18, 21, 24, 27, 30, 33, 37, 38, 42, 45, 48, 51,
+                                 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 93, 96, 99, 109, 112, 115],
+                                [0, 10, 11, 15, 16, 20, 21, 25, 35, 36, 40, 41, 45, 46, 50, 57, 64, 74, 87, 97, 101]],
+        expected_offsets_limit=[[7, 10, 18, 21, 24, 27, 30, 33],
+                                [3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45],
+                                [4, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37, 40],
+                                [3, 6, 9, 13, 16, 20, 23, 26, 29, 32, 35, 38, 42, 45, 49, 52, 55, 58,
+                                 61, 64, 67, 70, 73, 76, 79, 82, 85, 88, 91, 94, 97, 100, 103],
+                                [3, 6, 9, 13, 14, 18, 21, 24, 27, 30, 33, 37, 38, 42, 45, 48, 51, 54,
+                                 57, 60, 63, 66, 69, 72, 75, 78, 81, 93, 96, 99, 109, 112, 115, 124],
+                                [9, 11, 15, 16, 20, 21, 24, 34, 36, 40, 41, 45, 46, 49, 56, 63, 73, 86, 96, 100, 113]]
+    ),
+    dict(
+        first=7,
+        last=7,
+        expected_tokens=[['this', 'is', 'a', 'funky', 'string']],
+        expected_offsets_start=[[0, 5, 8, 10, 16]],
+        expected_offsets_limit=[[4, 7, 9, 15, 22]],
+        lower_case=True
+    ),
+]
+
+
+def check_basic_tokenizer_default(first, last, expected_tokens, expected_offsets_start, expected_offsets_limit,
+                                  lower_case=False, keep_whitespace=False,
+                                  normalization_form=text.utils.NormalizeForm.NONE, preserve_unused_token=False):
+    dataset = ds.TextFileDataset(BASIC_TOKENIZER_FILE, shuffle=False)
+    if first > 1:
+        dataset = dataset.skip(first - 1)
+    if last >= first:
+        dataset = dataset.take(last - first + 1)
+
+    basic_tokenizer = text.BasicTokenizer(lower_case=lower_case,
+                                          keep_whitespace=keep_whitespace,
+                                          normalization_form=normalization_form,
+                                          preserve_unused_token=preserve_unused_token)
+
+    dataset = dataset.map(operations=basic_tokenizer)
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['text'])
+        logger.info("Out:", token)
+        logger.info("Exp:", expected_tokens[count])
+        np.testing.assert_array_equal(token, expected_tokens[count])
+        count = count + 1
+
+
+def check_basic_tokenizer_with_offsets(first, last, expected_tokens, expected_offsets_start, expected_offsets_limit,
+                                       lower_case=False, keep_whitespace=False,
+                                       normalization_form=text.utils.NormalizeForm.NONE, preserve_unused_token=False):
+    dataset = ds.TextFileDataset(BASIC_TOKENIZER_FILE, shuffle=False)
+    if first > 1:
+        dataset = dataset.skip(first - 1)
+    if last >= first:
+        dataset = dataset.take(last - first + 1)
+
+    basic_tokenizer = text.BasicTokenizer(lower_case=lower_case,
+                                          keep_whitespace=keep_whitespace,
+                                          normalization_form=normalization_form,
+                                          preserve_unused_token=preserve_unused_token,
+                                          with_offsets=True)
+
+    dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                          columns_order=['token', 'offsets_start', 'offsets_limit'], operations=basic_tokenizer)
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['token'])
+        logger.info("Out:", token)
+        logger.info("Exp:", expected_tokens[count])
+        np.testing.assert_array_equal(token, expected_tokens[count])
+        np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+        np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+        count = count + 1
+
+def test_basic_tokenizer_with_offsets():
+    """
+    Test BasicTokenizer
+    """
+    for paras in test_paras:
+        check_basic_tokenizer_with_offsets(**paras)
+
+
+def test_basic_tokenizer_default():
+    """
+    Test BasicTokenizer
+    """
+    for paras in test_paras:
+        check_basic_tokenizer_default(**paras)
+
+
+if __name__ == '__main__':
+    test_basic_tokenizer_default()
+    test_basic_tokenizer_with_offsets()
diff --git a/tests/ut/python/dataset/test_bert_tokenizer.py b/tests/ut/python/dataset/test_text_bert_tokenizer.py
similarity index 51%
rename from tests/ut/python/dataset/test_bert_tokenizer.py
rename to tests/ut/python/dataset/test_text_bert_tokenizer.py
index ba487343a03..b29f94eb324 100644
--- a/tests/ut/python/dataset/test_bert_tokenizer.py
+++ b/tests/ut/python/dataset/test_text_bert_tokenizer.py
@@ -18,7 +18,7 @@ Testing BertTokenizer op in DE
 import numpy as np
 import mindspore.dataset as ds
 from mindspore import log as logger
-import mindspore.dataset.text as nlp
+import mindspore.dataset.text as text
 
 BERT_TOKENIZER_FILE = "../data/dataset/testTokenizerData/bert_tokenizer.txt"
 
@@ -39,6 +39,14 @@ test_paras = [
                     ['疑', '是', '地', '上', '霜'],
                     ['举', '头', '望', '明', '月'],
                     ['低', '头', '思', '故', '乡']],
+        expected_offsets_start=[[0, 3, 6, 9, 12],
+                                [0, 3, 6, 9, 12],
+                                [0, 3, 6, 9, 12],
+                                [0, 3, 6, 9, 12]],
+        expected_offsets_limit=[[3, 6, 9, 12, 15],
+                                [3, 6, 9, 12, 15],
+                                [3, 6, 9, 12, 15],
+                                [3, 6, 9, 12, 15]],
         vocab_list=vocab_bert
     ),
     # test english text
@@ -46,6 +54,8 @@ test_paras = [
         first=5,
         last=5,
         expect_str=[['i', 'am', 'mak', '##ing', 'small', 'mistake', '##s', 'during', 'work', '##ing', 'hour', '##s']],
+        expected_offsets_start=[[0, 2, 5, 8, 12, 18, 25, 27, 34, 38, 42, 46]],
+        expected_offsets_limit=[[1, 4, 8, 11, 17, 25, 26, 33, 38, 41, 46, 47]],
         lower_case=True,
         vocab_list=vocab_bert
     ),
@@ -53,6 +63,8 @@ test_paras = [
         first=5,
         last=5,
         expect_str=[['I', "am", 'mak', '##ing', 'small', 'mistake', '##s', 'during', 'work', '##ing', 'hour', '##s']],
+        expected_offsets_start=[[0, 2, 5, 8, 12, 18, 25, 27, 34, 38, 42, 46]],
+        expected_offsets_limit=[[1, 4, 8, 11, 17, 25, 26, 33, 38, 41, 46, 47]],
         lower_case=False,
         vocab_list=vocab_bert
     ),
@@ -63,7 +75,9 @@ test_paras = [
         expect_str=[
             ['😀', '嘿', '嘿', '😃', '哈', '哈', '😄', '大', '笑', '😁', '嘻', '嘻'],
             ['繁', '體', '字']],
-        normalization_form=nlp.utils.NormalizeForm.NFKC,
+        expected_offsets_start=[[0, 4, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37], [0, 3, 6]],
+        expected_offsets_limit=[[4, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37, 40], [3, 6, 9]],
+        normalization_form=text.utils.NormalizeForm.NFKC,
         vocab_list=vocab_bert
     ),
     # test preserved tokens
@@ -79,6 +93,8 @@ test_paras = [
             ['[unused1]'],
             ['[unused10]']
         ],
+        expected_offsets_start=[[0, 7], [0, 7], [0, 7], [0, 7], [0, 7], [0], [0]],
+        expected_offsets_limit=[[6, 12], [6, 12], [6, 12], [6, 12], [6, 13], [9], [10]],
         lower_case=False,
         vocab_list=vocab_bert,
         preserve_unused_token=True,
@@ -95,6 +111,8 @@ test_paras = [
             ['[unused1]'],
             ['[unused10]']
         ],
+        expected_offsets_start=[[0, 7], [0, 7], [0, 7], [0, 7], [0, 7], [0], [0]],
+        expected_offsets_limit=[[6, 12], [6, 12], [6, 12], [6, 12], [6, 13], [9], [10]],
         lower_case=True,
         vocab_list=vocab_bert,
         preserve_unused_token=True,
@@ -104,6 +122,8 @@ test_paras = [
         first=15,
         last=15,
         expect_str=[['12', '+', '/', '-', '28', '=', '40', '/', '-', '16']],
+        expected_offsets_start=[[0, 2, 3, 4, 5, 7, 8, 10, 11, 12]],
+        expected_offsets_limit=[[2, 3, 4, 5, 7, 8, 10, 11, 12, 14]],
         preserve_unused_token=True,
         vocab_list=vocab_bert
     ),
@@ -112,6 +132,8 @@ test_paras = [
         first=8,
         last=8,
         expect_str=[['[UNK]', ' ', '[CLS]']],
+        expected_offsets_start=[[0, 6, 7]],
+        expected_offsets_limit=[[6, 7, 12]],
         lower_case=False,
         vocab_list=vocab_bert,
         preserve_unused_token=True,
@@ -121,6 +143,8 @@ test_paras = [
         first=8,
         last=8,
         expect_str=[['unused', ' ', '[CLS]']],
+        expected_offsets_start=[[0, 6, 7]],
+        expected_offsets_limit=[[6, 7, 12]],
         lower_case=False,
         vocab_list=vocab_bert,
         preserve_unused_token=True,
@@ -131,6 +155,8 @@ test_paras = [
         first=8,
         last=8,
         expect_str=[['unused', ' ', '[', 'CLS', ']']],
+        expected_offsets_start=[[0, 6, 7, 8, 11]],
+        expected_offsets_limit=[[6, 7, 8, 11, 12]],
         lower_case=False,
         vocab_list=vocab_bert,
         preserve_unused_token=False,
@@ -140,20 +166,20 @@ test_paras = [
 ]
 
 
-def check_bert_tokenizer(first, last, expect_str,
-                         vocab_list,
-                         suffix_indicator='##',
-                         max_bytes_per_token=100, unknown_token='[UNK]',
-                         lower_case=False, keep_whitespace=False,
-                         normalization_form=nlp.utils.NormalizeForm.NONE,
-                         preserve_unused_token=False):
+def check_bert_tokenizer_default(first, last, expect_str,
+                                 expected_offsets_start, expected_offsets_limit,
+                                 vocab_list, suffix_indicator='##',
+                                 max_bytes_per_token=100, unknown_token='[UNK]',
+                                 lower_case=False, keep_whitespace=False,
+                                 normalization_form=text.utils.NormalizeForm.NONE,
+                                 preserve_unused_token=False):
     dataset = ds.TextFileDataset(BERT_TOKENIZER_FILE, shuffle=False)
     if first > 1:
         dataset = dataset.skip(first - 1)
     if last >= first:
         dataset = dataset.take(last - first + 1)
-    vocab = nlp.Vocab.from_list(vocab_list)
-    tokenizer_op = nlp.BertTokenizer(
+    vocab = text.Vocab.from_list(vocab_list)
+    tokenizer_op = text.BertTokenizer(
         vocab=vocab, suffix_indicator=suffix_indicator,
         max_bytes_per_token=max_bytes_per_token, unknown_token=unknown_token,
         lower_case=lower_case, keep_whitespace=keep_whitespace,
@@ -162,20 +188,59 @@ def check_bert_tokenizer(first, last, expect_str,
     dataset = dataset.map(operations=tokenizer_op)
     count = 0
     for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text'])
-        logger.info("Out:", text)
+        token = text.to_str(i['text'])
+        logger.info("Out:", token)
         logger.info("Exp:", expect_str[count])
-        np.testing.assert_array_equal(text, expect_str[count])
+        np.testing.assert_array_equal(token, expect_str[count])
         count = count + 1
 
 
-def test_bert_tokenizer():
+def check_bert_tokenizer_with_offsets(first, last, expect_str,
+                                      expected_offsets_start, expected_offsets_limit,
+                                      vocab_list, suffix_indicator='##',
+                                      max_bytes_per_token=100, unknown_token='[UNK]',
+                                      lower_case=False, keep_whitespace=False,
+                                      normalization_form=text.utils.NormalizeForm.NONE,
+                                      preserve_unused_token=False):
+    dataset = ds.TextFileDataset(BERT_TOKENIZER_FILE, shuffle=False)
+    if first > 1:
+        dataset = dataset.skip(first - 1)
+    if last >= first:
+        dataset = dataset.take(last - first + 1)
+    vocab = text.Vocab.from_list(vocab_list)
+    tokenizer_op = text.BertTokenizer(
+        vocab=vocab, suffix_indicator=suffix_indicator, max_bytes_per_token=max_bytes_per_token,
+        unknown_token=unknown_token, lower_case=lower_case, keep_whitespace=keep_whitespace,
+        normalization_form=normalization_form, preserve_unused_token=preserve_unused_token, with_offsets=True)
+    dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                          columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer_op)
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['token'])
+        logger.info("Out:", token)
+        logger.info("Exp:", expect_str[count])
+        np.testing.assert_array_equal(token, expect_str[count])
+        np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+        np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+        count = count + 1
+
+
+def test_bert_tokenizer_default():
     """
-    Test WordpieceTokenizer
+    Test WordpieceTokenizer when with_offsets=False
     """
     for paras in test_paras:
-        check_bert_tokenizer(**paras)
+        check_bert_tokenizer_default(**paras)
+
+
+def test_bert_tokenizer_with_offsets():
+    """
+    Test WordpieceTokenizer when with_offsets=True
+    """
+    for paras in test_paras:
+        check_bert_tokenizer_with_offsets(**paras)
 
 
 if __name__ == '__main__':
-    test_bert_tokenizer()
+    test_bert_tokenizer_default()
+    test_bert_tokenizer_with_offsets()
diff --git a/tests/ut/python/dataset/test_text_jieba_tokenizer.py b/tests/ut/python/dataset/test_text_jieba_tokenizer.py
new file mode 100644
index 00000000000..66665b61e69
--- /dev/null
+++ b/tests/ut/python/dataset/test_text_jieba_tokenizer.py
@@ -0,0 +1,471 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+import mindspore.dataset as ds
+from mindspore.dataset.text import JiebaTokenizer
+from mindspore.dataset.text import JiebaMode, to_str
+
+DATA_FILE = "../data/dataset/testJiebaDataset/3.txt"
+DATA_ALL_FILE = "../data/dataset/testJiebaDataset/*"
+
+HMM_FILE = "../data/dataset/jiebadict/hmm_model.utf8"
+MP_FILE = "../data/dataset/jiebadict/jieba.dict.utf8"
+
+
+def test_jieba_1():
+    """Test jieba tokenizer with MP mode"""
+    data = ds.TextFileDataset(DATA_FILE)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
+    ret = []
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_1_1():
+    """Test jieba tokenizer with HMM mode"""
+    data = ds.TextFileDataset(DATA_FILE)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.HMM)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天', '天气', '太', '好', '了', '我们', '一起', '去', '外面', '玩', '吧']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_1_2():
+    """Test jieba tokenizer with HMM MIX"""
+    data = ds.TextFileDataset(DATA_FILE)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MIX)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_2():
+    """Test add_word"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    jieba_op.add_word("男默女泪")
+    expect = ['男默女泪', '市', '长江大桥']
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=2)
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_2_1():
+    """Test add_word with freq"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    jieba_op.add_word("男默女泪", 10)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=2)
+    expect = ['男默女泪', '市', '长江大桥']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_2_2():
+    """Test add_word with invalid None Input"""
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    try:
+        jieba_op.add_word(None)
+    except ValueError:
+        pass
+
+
+def test_jieba_2_3():
+    """Test add_word with freq, the value of freq affects the result of segmentation"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/6.txt"
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    jieba_op.add_word("江大桥", 20000)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=2)
+    expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_3():
+    """Test add_dict with dict"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    user_dict = {
+        "男默女泪": 10
+    }
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    jieba_op.add_dict(user_dict)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['男默女泪', '市', '长江大桥']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_3_1():
+    """Test add_dict with dict"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    user_dict = {
+        "男默女泪": 10,
+        "江大桥": 20000
+    }
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    jieba_op.add_dict(user_dict)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['男默女泪', '市长', '江大桥']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_4():
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/3.txt"
+    DICT_FILE = "../data/dataset/testJiebaDataset/user_dict.txt"
+
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    jieba_op.add_dict(DICT_FILE)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_4_1():
+    """Test add dict with invalid file path"""
+    DICT_FILE = ""
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    try:
+        jieba_op.add_dict(DICT_FILE)
+    except ValueError:
+        pass
+
+
+def test_jieba_5():
+    """Test add dict with file path"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/6.txt"
+
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP)
+    jieba_op.add_word("江大桥", 20000)
+    data = data.map(input_columns=["text"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+def test_jieba_with_offsets_1():
+    """Test jieba tokenizer with MP mode"""
+    data = ds.TextFileDataset(DATA_FILE)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
+    expected_offsets_start = [0, 12, 21, 27, 33, 36, 42]
+    expected_offsets_limit = [12, 21, 27, 33, 36, 42, 48]
+    ret = []
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_1_1():
+    """Test jieba tokenizer with HMM mode"""
+    data = ds.TextFileDataset(DATA_FILE)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.HMM, with_offsets=True)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天', '天气', '太', '好', '了', '我们', '一起', '去', '外面', '玩', '吧']
+    expected_offsets_start = [0, 6, 12, 15, 18, 21, 27, 33, 36, 42, 45]
+    expected_offsets_limit = [6, 12, 15, 18, 21, 27, 33, 36, 42, 45, 48]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_1_2():
+    """Test jieba tokenizer with HMM MIX"""
+    data = ds.TextFileDataset(DATA_FILE)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MIX, with_offsets=True)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
+    expected_offsets_start = [0, 12, 21, 27, 33, 36, 42]
+    expected_offsets_limit = [12, 21, 27, 33, 36, 42, 48]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_2():
+    """Test add_word"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    jieba_op.add_word("男默女泪")
+    expect = ['男默女泪', '市', '长江大桥']
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=2)
+    expected_offsets_start = [0, 12, 15]
+    expected_offsets_limit = [12, 15, 27]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_2_1():
+    """Test add_word with freq"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    jieba_op.add_word("男默女泪", 10)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=2)
+    expect = ['男默女泪', '市', '长江大桥']
+    expected_offsets_start = [0, 12, 15]
+    expected_offsets_limit = [12, 15, 27]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_2_2():
+    """Test add_word with freq, the value of freq affects the result of segmentation"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/6.txt"
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    jieba_op.add_word("江大桥", 20000)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=2)
+    expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式']
+    expected_offsets_start = [0, 6, 12, 21, 27, 30, 42, 45, 51]
+    expected_offsets_limit = [6, 12, 21, 27, 30, 42, 45, 51, 57]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_3():
+    """Test add_dict with dict"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    user_dict = {
+        "男默女泪": 10
+    }
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    jieba_op.add_dict(user_dict)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['男默女泪', '市', '长江大桥']
+    expected_offsets_start = [0, 12, 15]
+    expected_offsets_limit = [12, 15, 27]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_3_1():
+    """Test add_dict with dict"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/4.txt"
+    user_dict = {
+        "男默女泪": 10,
+        "江大桥": 20000
+    }
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    jieba_op.add_dict(user_dict)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['男默女泪', '市长', '江大桥']
+    expected_offsets_start = [0, 12, 18]
+    expected_offsets_limit = [12, 18, 27]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_4():
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/3.txt"
+    DICT_FILE = "../data/dataset/testJiebaDataset/user_dict.txt"
+
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    jieba_op.add_dict(DICT_FILE)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']
+    expected_offsets_start = [0, 12, 21, 27, 33, 36, 42]
+    expected_offsets_limit = [12, 21, 27, 33, 36, 42, 48]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+
+def test_jieba_with_offsets_5():
+    """Test add dict with file path"""
+    DATA_FILE4 = "../data/dataset/testJiebaDataset/6.txt"
+
+    data = ds.TextFileDataset(DATA_FILE4)
+    jieba_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
+    jieba_op.add_word("江大桥", 20000)
+    data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
+                    columns_order=["token", "offsets_start", "offsets_limit"],
+                    operations=jieba_op, num_parallel_workers=1)
+    expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式']
+    expected_offsets_start = [0, 6, 12, 21, 27, 30, 42, 45, 51]
+    expected_offsets_limit = [6, 12, 21, 27, 30, 42, 45, 51, 57]
+    for i in data.create_dict_iterator():
+        ret = to_str(i["token"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+        for index, item in enumerate(i["offsets_start"]):
+            assert item == expected_offsets_start[index]
+        for index, item in enumerate(i["offsets_limit"]):
+            assert item == expected_offsets_limit[index]
+
+def gen():
+    text = np.array("今天天气太好了我们一起去外面玩吧".encode("UTF8"), dtype='S')
+    yield (text,)
+
+
+def pytoken_op(input_data):
+    te = str(to_str(input_data))
+    tokens = []
+    tokens.append(te[:5].encode("UTF8"))
+    tokens.append(te[5:10].encode("UTF8"))
+    tokens.append(te[10:].encode("UTF8"))
+    return np.array(tokens, dtype='S')
+
+
+def test_jieba_6():
+    data = ds.GeneratorDataset(gen, column_names=["text"])
+    data = data.map(input_columns=["text"],
+                    operations=pytoken_op, num_parallel_workers=1)
+    expect = ['今天天气太', '好了我们一', '起去外面玩吧']
+    for i in data.create_dict_iterator():
+        ret = to_str(i["text"])
+        for index, item in enumerate(ret):
+            assert item == expect[index]
+
+
+if __name__ == "__main__":
+    test_jieba_1()
+    test_jieba_1_1()
+    test_jieba_1_2()
+    test_jieba_2()
+    test_jieba_2_1()
+    test_jieba_2_2()
+    test_jieba_3()
+    test_jieba_3_1()
+    test_jieba_4()
+    test_jieba_4_1()
+    test_jieba_5()
+    test_jieba_5()
+    test_jieba_6()
+    test_jieba_with_offsets_1()
+    test_jieba_with_offsets_1_1()
+    test_jieba_with_offsets_1_2()
+    test_jieba_with_offsets_2()
+    test_jieba_with_offsets_2_1()
+    test_jieba_with_offsets_2_2()
+    test_jieba_with_offsets_3()
+    test_jieba_with_offsets_3_1()
+    test_jieba_with_offsets_4()
+    test_jieba_with_offsets_5()
diff --git a/tests/ut/python/dataset/test_text_tokenizer.py b/tests/ut/python/dataset/test_text_tokenizer.py
new file mode 100644
index 00000000000..2e2b7b741dd
--- /dev/null
+++ b/tests/ut/python/dataset/test_text_tokenizer.py
@@ -0,0 +1,380 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Testing UnicodeCharTokenizer op in DE
+"""
+import numpy as np
+import mindspore.dataset as ds
+from mindspore import log as logger
+import mindspore.dataset.text as text
+
+DATA_FILE = "../data/dataset/testTokenizerData/1.txt"
+NORMALIZE_FILE = "../data/dataset/testTokenizerData/normalize.txt"
+REGEX_REPLACE_FILE = "../data/dataset/testTokenizerData/regex_replace.txt"
+REGEX_TOKENIZER_FILE = "../data/dataset/testTokenizerData/regex_tokenizer.txt"
+
+
+def split_by_unicode_char(input_strs):
+    """
+    Split utf-8 strings to unicode characters
+    """
+    out = []
+    for s in input_strs:
+        out.append([c for c in s])
+    return out
+
+
+def test_unicode_char_tokenizer_default():
+    """
+    Test UnicodeCharTokenizer
+    """
+    input_strs = ("Welcome to Beijing!", "北京欢迎您！", "我喜欢English!", "  ")
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.UnicodeCharTokenizer()
+    dataset = dataset.map(operations=tokenizer)
+    tokens = []
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['text']).tolist()
+        tokens.append(token)
+    logger.info("The out tokens is : {}".format(tokens))
+    assert split_by_unicode_char(input_strs) == tokens
+
+
+def test_unicode_char_tokenizer_with_offsets():
+    """
+    Test UnicodeCharTokenizer
+    """
+    input_strs = ("Welcome to Beijing!", "北京欢迎您！", "我喜欢English!", "  ")
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.UnicodeCharTokenizer(with_offsets=True)
+    dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                          columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer)
+    tokens = []
+    expected_offsets_start = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+                              [0, 3, 6, 9, 12, 15], [0, 3, 6, 9, 10, 11, 12, 13, 14, 15, 16], [0, 1]]
+    expected_offsets_limit = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
+                              [3, 6, 9, 12, 15, 18], [3, 6, 9, 10, 11, 12, 13, 14, 15, 16, 17], [1, 2]]
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['token']).tolist()
+        tokens.append(token)
+        np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+        np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+        count += 1
+    logger.info("The out tokens is : {}".format(tokens))
+    assert split_by_unicode_char(input_strs) == tokens
+
+
+def test_whitespace_tokenizer_default():
+    """
+    Test WhitespaceTokenizer
+    """
+    whitespace_strs = [["Welcome", "to", "Beijing!"],
+                       ["北京欢迎您！"],
+                       ["我喜欢English!"],
+                       [""]]
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.WhitespaceTokenizer()
+    dataset = dataset.map(operations=tokenizer)
+    tokens = []
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['text']).tolist()
+        tokens.append(token)
+    logger.info("The out tokens is : {}".format(tokens))
+    assert whitespace_strs == tokens
+
+
+def test_whitespace_tokenizer_with_offsets():
+    """
+    Test WhitespaceTokenizer
+    """
+    whitespace_strs = [["Welcome", "to", "Beijing!"],
+                       ["北京欢迎您！"],
+                       ["我喜欢English!"],
+                       [""]]
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.WhitespaceTokenizer(with_offsets=True)
+    dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                          columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer)
+    tokens = []
+    expected_offsets_start = [[0, 8, 11], [0], [0], [0]]
+    expected_offsets_limit = [[7, 10, 19], [18], [17], [0]]
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['token']).tolist()
+        tokens.append(token)
+        np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+        np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+        count += 1
+
+    logger.info("The out tokens is : {}".format(tokens))
+    assert whitespace_strs == tokens
+
+
+def test_unicode_script_tokenizer_default():
+    """
+    Test UnicodeScriptTokenizer when para keep_whitespace=False
+    """
+    unicode_script_strs = [["Welcome", "to", "Beijing", "!"],
+                           ["北京欢迎您", "！"],
+                           ["我喜欢", "English", "!"],
+                           [""]]
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=False)
+    dataset = dataset.map(operations=tokenizer)
+
+    tokens = []
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['text']).tolist()
+        tokens.append(token)
+    logger.info("The out tokens is : {}".format(tokens))
+    assert unicode_script_strs == tokens
+
+
+def test_unicode_script_tokenizer_default2():
+    """
+    Test UnicodeScriptTokenizer when para keep_whitespace=True
+    """
+    unicode_script_strs2 = [["Welcome", " ", "to", " ", "Beijing", "!"],
+                            ["北京欢迎您", "！"],
+                            ["我喜欢", "English", "!"],
+                            ["  "]]
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=True)
+    dataset = dataset.map(operations=tokenizer)
+    tokens = []
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['text']).tolist()
+        tokens.append(token)
+    logger.info("The out tokens is :", tokens)
+    assert unicode_script_strs2 == tokens
+
+
+def test_unicode_script_tokenizer_with_offsets():
+    """
+    Test UnicodeScriptTokenizer when para keep_whitespace=False and with_offsets=True
+    """
+    unicode_script_strs = [["Welcome", "to", "Beijing", "!"],
+                           ["北京欢迎您", "！"],
+                           ["我喜欢", "English", "!"],
+                           [""]]
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=False, with_offsets=True)
+    dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                          columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer)
+    tokens = []
+    expected_offsets_start = [[0, 8, 11, 18], [0, 15], [0, 9, 16], [0]]
+    expected_offsets_limit = [[7, 10, 18, 19], [15, 18], [9, 16, 17], [0]]
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['token']).tolist()
+        tokens.append(token)
+        np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+        np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+        count += 1
+    logger.info("The out tokens is : {}".format(tokens))
+    assert unicode_script_strs == tokens
+
+
+def test_unicode_script_tokenizer_with_offsets2():
+    """
+    Test UnicodeScriptTokenizer when para keep_whitespace=True and with_offsets=True
+    """
+    unicode_script_strs2 = [["Welcome", " ", "to", " ", "Beijing", "!"],
+                            ["北京欢迎您", "！"],
+                            ["我喜欢", "English", "!"],
+                            ["  "]]
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=True, with_offsets=True)
+    dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                          columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer)
+    tokens = []
+    expected_offsets_start = [[0, 7, 8, 10, 11, 18], [0, 15], [0, 9, 16], [0]]
+    expected_offsets_limit = [[7, 8, 10, 11, 18, 19], [15, 18], [9, 16, 17], [2]]
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['token']).tolist()
+        tokens.append(token)
+        np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+        np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+        count += 1
+    logger.info("The out tokens is :", tokens)
+    assert unicode_script_strs2 == tokens
+
+
+def test_case_fold():
+    """
+    Test CaseFold
+    """
+    expect_strs = ["welcome to beijing!", "北京欢迎您!", "我喜欢english!", "  "]
+    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
+    op = text.CaseFold()
+    dataset = dataset.map(operations=op)
+
+    lower_strs = []
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['text']).tolist()
+        lower_strs.append(token)
+    assert lower_strs == expect_strs
+
+
+def test_normalize_utf8():
+    """
+    Test NormalizeUTF8
+    """
+
+    def normalize(normalize_form):
+        dataset = ds.TextFileDataset(NORMALIZE_FILE, shuffle=False)
+        normalize = text.NormalizeUTF8(normalize_form=normalize_form)
+        dataset = dataset.map(operations=normalize)
+        out_bytes = []
+        out_texts = []
+        for i in dataset.create_dict_iterator():
+            out_bytes.append(i['text'])
+            out_texts.append(text.to_str(i['text']).tolist())
+        logger.info("The out bytes is : ", out_bytes)
+        logger.info("The out texts is: ", out_texts)
+        return out_bytes
+
+    expect_normlize_data = [
+        # NFC
+        [b'\xe1\xb9\xa9', b'\xe1\xb8\x8d\xcc\x87', b'q\xcc\xa3\xcc\x87',
+         b'\xef\xac\x81', b'2\xe2\x81\xb5', b'\xe1\xba\x9b\xcc\xa3'],
+        # NFKC
+        [b'\xe1\xb9\xa9', b'\xe1\xb8\x8d\xcc\x87', b'q\xcc\xa3\xcc\x87',
+         b'fi', b'25', b'\xe1\xb9\xa9'],
+        # NFD
+        [b's\xcc\xa3\xcc\x87', b'd\xcc\xa3\xcc\x87', b'q\xcc\xa3\xcc\x87',
+         b'\xef\xac\x81', b'2\xe2\x81\xb5', b'\xc5\xbf\xcc\xa3\xcc\x87'],
+        # NFKD
+        [b's\xcc\xa3\xcc\x87', b'd\xcc\xa3\xcc\x87', b'q\xcc\xa3\xcc\x87',
+         b'fi', b'25', b's\xcc\xa3\xcc\x87']
+    ]
+    assert normalize(text.utils.NormalizeForm.NFC) == expect_normlize_data[0]
+    assert normalize(text.utils.NormalizeForm.NFKC) == expect_normlize_data[1]
+    assert normalize(text.utils.NormalizeForm.NFD) == expect_normlize_data[2]
+    assert normalize(text.utils.NormalizeForm.NFKD) == expect_normlize_data[3]
+
+
+def test_regex_replace():
+    """
+    Test RegexReplace
+    """
+
+    def regex_replace(first, last, expect_str, pattern, replace):
+        dataset = ds.TextFileDataset(REGEX_REPLACE_FILE, shuffle=False)
+        if first > 1:
+            dataset = dataset.skip(first - 1)
+        if last >= first:
+            dataset = dataset.take(last - first + 1)
+        replace_op = text.RegexReplace(pattern, replace)
+        dataset = dataset.map(operations=replace_op)
+        out_text = []
+        for i in dataset.create_dict_iterator():
+            token = text.to_str(i['text']).tolist()
+            out_text.append(token)
+        logger.info("Out:", out_text)
+        logger.info("Exp:", expect_str)
+        assert expect_str == out_text
+
+    regex_replace(1, 2, ['H____ W____', "L__'_ G_"], "\\p{Ll}", '_')
+    regex_replace(3, 5, ['hello', 'world', '31:beijing'], "^(\\d:|b:)", "")
+    regex_replace(6, 6, ["WelcometoChina!"], "\\s+", "")
+    regex_replace(7, 8, ['我不想长大', 'WelcometoShenzhen!'], "\\p{Cc}|\\p{Cf}|\\s+", "")
+
+
+def test_regex_tokenizer_default():
+    """
+    Test RegexTokenizer
+    """
+
+    def regex_tokenizer(first, last, expect_str, delim_pattern, keep_delim_pattern):
+        dataset = ds.TextFileDataset(REGEX_TOKENIZER_FILE, shuffle=False)
+        if first > 1:
+            dataset = dataset.skip(first - 1)
+        if last >= first:
+            dataset = dataset.take(last - first + 1)
+        tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern)
+        dataset = dataset.map(operations=tokenizer_op)
+        out_text = []
+        count = 0
+        for i in dataset.create_dict_iterator():
+            token = text.to_str(i['text']).tolist()
+            np.testing.assert_array_equal(token, expect_str[count])
+            count += 1
+            out_text.append(token)
+        logger.info("Out:", out_text)
+        logger.info("Exp:", expect_str)
+
+    regex_tokenizer(1, 1, [['Welcome', 'to', 'Shenzhen!']], "\\s+", "")
+    regex_tokenizer(1, 1, [['Welcome', ' ', 'to', ' ', 'Shenzhen!']], "\\s+", "\\s+")
+    regex_tokenizer(2, 2, [['北', '京', '欢', '迎', '您', '!Welcome to Beijing!']], r"\p{Han}", r"\p{Han}")
+    regex_tokenizer(3, 3, [['12', '￥+', '36', '￥=?']], r"[\p{P}|\p{S}]+", r"[\p{P}|\p{S}]+")
+    regex_tokenizer(3, 3, [['12', '36']], r"[\p{P}|\p{S}]+", "")
+    regex_tokenizer(3, 3, [['￥+', '￥=?']], r"[\p{N}]+", "")
+
+
+def test_regex_tokenizer_with_offsets():
+    """
+    Test RegexTokenizer
+    """
+
+    def regex_tokenizer(first, last, expect_str, expected_offsets_start, expected_offsets_limit, delim_pattern,
+                        keep_delim_pattern):
+        dataset = ds.TextFileDataset(REGEX_TOKENIZER_FILE, shuffle=False)
+        if first > 1:
+            dataset = dataset.skip(first - 1)
+        if last >= first:
+            dataset = dataset.take(last - first + 1)
+        tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True)
+        dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                              columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer_op)
+        out_text = []
+        count = 0
+        for i in dataset.create_dict_iterator():
+            token = text.to_str(i['token']).tolist()
+            np.testing.assert_array_equal(token, expect_str[count])
+            np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+            np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+            count += 1
+            out_text.append(token)
+        logger.info("Out:", out_text)
+        logger.info("Exp:", expect_str)
+
+    regex_tokenizer(1, 1, [['Welcome', 'to', 'Shenzhen!']], [[0, 8, 11]], [[7, 10, 20]], "\\s+", "")
+    regex_tokenizer(1, 1, [['Welcome', ' ', 'to', ' ', 'Shenzhen!']], [[0, 7, 8, 10, 11]], [[7, 8, 10, 11, 20]],
+                    "\\s+", "\\s+")
+    regex_tokenizer(2, 2, [['北', '京', '欢', '迎', '您', '!Welcome to Beijing!']], [[0, 3, 6, 9, 12, 15]],
+                    [[3, 6, 9, 12, 15, 35]], r"\p{Han}", r"\p{Han}")
+    regex_tokenizer(3, 3, [['12', '￥+', '36', '￥=?']], [[0, 2, 6, 8]], [[2, 6, 8, 13]],
+                    r"[\p{P}|\p{S}]+", r"[\p{P}|\p{S}]+")
+    regex_tokenizer(3, 3, [['12', '36']], [[0, 6]], [[2, 8]], r"[\p{P}|\p{S}]+", "")
+    regex_tokenizer(3, 3, [['￥+', '￥=?']], [[2, 8]], [[6, 13]], r"[\p{N}]+", "")
+
+
+if __name__ == '__main__':
+    test_unicode_char_tokenizer_default()
+    test_unicode_char_tokenizer_with_offsets()
+    test_whitespace_tokenizer_default()
+    test_whitespace_tokenizer_with_offsets()
+    test_unicode_script_tokenizer_default()
+    test_unicode_script_tokenizer_default2()
+    test_unicode_script_tokenizer_with_offsets()
+    test_unicode_script_tokenizer_with_offsets2()
+    test_case_fold()
+    test_normalize_utf8()
+    test_regex_replace()
+    test_regex_tokenizer_default()
+    test_regex_tokenizer_with_offsets()
diff --git a/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py b/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py
new file mode 100644
index 00000000000..8b47ec971ef
--- /dev/null
+++ b/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py
@@ -0,0 +1,160 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Testing WordpieceTokenizer op in DE
+"""
+import numpy as np
+import mindspore.dataset as ds
+from mindspore import log as logger
+import mindspore.dataset.text as text
+
+WORDPIECE_TOKENIZER_FILE = "../data/dataset/testTokenizerData/wordpiece_tokenizer.txt"
+
+vocab_english = [
+    "book", "cholera", "era", "favor", "##ite", "my", "is", "love", "dur", "##ing", "the"
+]
+
+vocab_chinese = [
+    "我", '最', '喜', '欢', '的', '书', '是', '霍', '乱', '时', '期', '爱', '情'
+]
+
+vocab_mix = vocab_chinese + vocab_english
+
+test_paras = [
+    dict(
+        first=1,
+        last=10,
+        expect_str=[['my'], ['favor', '##ite'], ['book'], ['is'], ['love'], ['dur', '##ing'], ['the'], ['cholera'],
+                    ['era'], ['[UNK]']],
+        expected_offsets_start=[[0], [0, 5], [0], [0], [0], [0, 3], [0], [0], [0], [0]],
+        expected_offsets_limit=[[2], [5, 8], [4], [2], [4], [3, 6], [3], [7], [3], [4]],
+        vocab_list=vocab_english
+    ),
+    dict(
+        first=1,
+        last=10,
+        expect_str=[['my'], ['favor', '##ite'], ['book'], ['is'], ['love'], ['dur', '##ing'], ['the'], ['cholera'],
+                    ['era'], ['what']],
+        expected_offsets_start=[[0], [0, 5], [0], [0], [0], [0, 3], [0], [0], [0], [0]],
+        expected_offsets_limit=[[2], [5, 8], [4], [2], [4], [3, 6], [3], [7], [3], [4]],
+        vocab_list=vocab_english,
+        unknown_token=""
+    ),
+    dict(
+        first=1,
+        last=10,
+        expect_str=[['my'], ['[UNK]'], ['book'], ['is'], ['love'], ['[UNK]'], ['the'], ['[UNK]'], ['era'], ['[UNK]']],
+        expected_offsets_start=[[0], [0], [0], [0], [0], [0], [0], [0], [0], [0]],
+        expected_offsets_limit=[[2], [5], [4], [2], [4], [5], [3], [5], [3], [4]],
+        vocab_list=vocab_english,
+        max_bytes_per_token=4
+    ),
+    dict(
+        first=11,
+        last=25,
+        expect_str=[['我'], ['最'], ['喜'], ['欢'], ['的'], ['书'], ['是'], ['霍'], ['乱'], ['时'], ['期'], ['的'], ['爱'], ['情'],
+                    ['[UNK]']],
+        expected_offsets_start=[[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]],
+        expected_offsets_limit=[[3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3]],
+        vocab_list=vocab_chinese,
+    ),
+    dict(
+        first=25,
+        last=25,
+        expect_str=[['您']],
+        expected_offsets_start=[[0]],
+        expected_offsets_limit=[[3]],
+        vocab_list=vocab_chinese,
+        unknown_token=""
+    ),
+    dict(
+        first=1,
+        last=25,
+        expect_str=[
+            ['my'], ['favor', '##ite'], ['book'], ['is'], ['love'], ['dur', '##ing'], ['the'], ['cholera'], ['era'],
+            ['[UNK]'],
+            ['我'], ['最'], ['喜'], ['欢'], ['的'], ['书'], ['是'], ['霍'], ['乱'], ['时'], ['期'], ['的'], ['爱'], ['情'],
+            ['[UNK]']],
+        expected_offsets_start=[[0], [0, 5], [0], [0], [0], [0, 3], [0], [0], [0], [0],
+                                [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]],
+        expected_offsets_limit=[[2], [5, 8], [4], [2], [4], [3, 6], [3], [7], [3], [4],
+                                [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3], [3]],
+        vocab_list=vocab_mix,
+    ),
+]
+
+
+def check_wordpiece_tokenizer_default(first, last, expect_str, expected_offsets_start, expected_offsets_limit,
+                                      vocab_list, unknown_token='[UNK]', max_bytes_per_token=100):
+    dataset = ds.TextFileDataset(WORDPIECE_TOKENIZER_FILE, shuffle=False)
+    if first > 1:
+        dataset = dataset.skip(first - 1)
+    if last >= first:
+        dataset = dataset.take(last - first + 1)
+    vocab = text.Vocab.from_list(vocab_list)
+    tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=unknown_token,
+                                           max_bytes_per_token=max_bytes_per_token)
+    dataset = dataset.map(operations=tokenizer_op)
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['text'])
+        logger.info("Out:", token)
+        logger.info("Exp:", expect_str[count])
+        np.testing.assert_array_equal(token, expect_str[count])
+        count = count + 1
+
+
+def check_wordpiece_tokenizer_with_offsets(first, last, expect_str, expected_offsets_start, expected_offsets_limit,
+                                           vocab_list, unknown_token='[UNK]', max_bytes_per_token=100):
+    dataset = ds.TextFileDataset(WORDPIECE_TOKENIZER_FILE, shuffle=False)
+    if first > 1:
+        dataset = dataset.skip(first - 1)
+    if last >= first:
+        dataset = dataset.take(last - first + 1)
+    vocab = text.Vocab.from_list(vocab_list)
+    tokenizer_op = text.WordpieceTokenizer(vocab=vocab, with_offsets=True, unknown_token=unknown_token,
+                                           max_bytes_per_token=max_bytes_per_token)
+    dataset = dataset.map(input_columns=['text'], output_columns=['token', 'offsets_start', 'offsets_limit'],
+                          columns_order=['token', 'offsets_start', 'offsets_limit'], operations=tokenizer_op)
+    count = 0
+    for i in dataset.create_dict_iterator():
+        token = text.to_str(i['token'])
+        logger.info("Out:", token)
+        logger.info("Exp:", expect_str[count])
+        np.testing.assert_array_equal(token, expect_str[count])
+        np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count])
+        np.testing.assert_array_equal(i['offsets_limit'], expected_offsets_limit[count])
+        count = count + 1
+
+
+def test_wordpiece_tokenizer_default():
+    """
+    Test WordpieceTokenizer
+    """
+    for paras in test_paras:
+        check_wordpiece_tokenizer_default(**paras)
+
+
+def test_wordpiece_tokenizer_with_offsets():
+    """
+    Test WordpieceTokenizer
+    """
+    for paras in test_paras:
+        check_wordpiece_tokenizer_with_offsets(**paras)
+
+
+if __name__ == '__main__':
+    test_wordpiece_tokenizer_default()
+    test_wordpiece_tokenizer_with_offsets()
diff --git a/tests/ut/python/dataset/test_tfreader_op.py b/tests/ut/python/dataset/test_tfreader_op.py
index 5948b1e4c10..f57c387b356 100644
--- a/tests/ut/python/dataset/test_tfreader_op.py
+++ b/tests/ut/python/dataset/test_tfreader_op.py
@@ -12,21 +12,30 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""
+Test TFRecordDataset Ops
+"""
 import numpy as np
 import pytest
-from util import save_and_check
 
 import mindspore.common.dtype as mstype
 import mindspore.dataset as ds
 from mindspore import log as logger
+from util import save_and_check_dict
 
 FILES = ["../data/dataset/testTFTestAllTypes/test.data"]
 DATASET_ROOT = "../data/dataset/testTFTestAllTypes/"
 SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
+DATA_FILES2 = ["../data/dataset/test_tf_file_3_images2/train-0000-of-0001.data",
+               "../data/dataset/test_tf_file_3_images2/train-0000-of-0002.data",
+               "../data/dataset/test_tf_file_3_images2/train-0000-of-0003.data",
+               "../data/dataset/test_tf_file_3_images2/train-0000-of-0004.data"]
+SCHEMA_FILE2 = "../data/dataset/test_tf_file_3_images2/datasetSchema.json"
 GENERATE_GOLDEN = False
 
 
-def test_case_tf_shape():
+def test_tfrecord_shape():
+    logger.info("test_tfrecord_shape")
     schema_file = "../data/dataset/testTFTestAllTypes/datasetSchemaRank0.json"
     ds1 = ds.TFRecordDataset(FILES, schema_file)
     ds1 = ds1.batch(2)
@@ -36,7 +45,8 @@ def test_case_tf_shape():
     assert len(output_shape[-1]) == 1
 
 
-def test_case_tf_read_all_dataset():
+def test_tfrecord_read_all_dataset():
+    logger.info("test_tfrecord_read_all_dataset")
     schema_file = "../data/dataset/testTFTestAllTypes/datasetSchemaNoRow.json"
     ds1 = ds.TFRecordDataset(FILES, schema_file)
     assert ds1.get_dataset_size() == 12
@@ -46,7 +56,8 @@ def test_case_tf_read_all_dataset():
     assert count == 12
 
 
-def test_case_num_samples():
+def test_tfrecord_num_samples():
+    logger.info("test_tfrecord_num_samples")
     schema_file = "../data/dataset/testTFTestAllTypes/datasetSchema7Rows.json"
     ds1 = ds.TFRecordDataset(FILES, schema_file, num_samples=8)
     assert ds1.get_dataset_size() == 8
@@ -56,7 +67,8 @@ def test_case_num_samples():
     assert count == 8
 
 
-def test_case_num_samples2():
+def test_tfrecord_num_samples2():
+    logger.info("test_tfrecord_num_samples2")
     schema_file = "../data/dataset/testTFTestAllTypes/datasetSchema7Rows.json"
     ds1 = ds.TFRecordDataset(FILES, schema_file)
     assert ds1.get_dataset_size() == 7
@@ -66,42 +78,41 @@ def test_case_num_samples2():
     assert count == 7
 
 
-def test_case_tf_shape_2():
+def test_tfrecord_shape2():
+    logger.info("test_tfrecord_shape2")
     ds1 = ds.TFRecordDataset(FILES, SCHEMA_FILE)
     ds1 = ds1.batch(2)
     output_shape = ds1.output_shapes()
     assert len(output_shape[-1]) == 2
 
 
-def test_case_tf_file():
-    logger.info("reading data from: {}".format(FILES[0]))
-    parameters = {"params": {}}
+def test_tfrecord_files_basic():
+    logger.info("test_tfrecord_files_basic")
 
     data = ds.TFRecordDataset(FILES, SCHEMA_FILE, shuffle=ds.Shuffle.FILES)
-    filename = "tfreader_result.npz"
-    save_and_check(data, parameters, filename, generate_golden=GENERATE_GOLDEN)
+    filename = "tfrecord_files_basic.npz"
+    save_and_check_dict(data, filename, generate_golden=GENERATE_GOLDEN)
 
 
-def test_case_tf_file_no_schema():
-    logger.info("reading data from: {}".format(FILES[0]))
-    parameters = {"params": {}}
+def test_tfrecord_no_schema():
+    logger.info("test_tfrecord_no_schema")
 
     data = ds.TFRecordDataset(FILES, shuffle=ds.Shuffle.FILES)
-    filename = "tf_file_no_schema.npz"
-    save_and_check(data, parameters, filename, generate_golden=GENERATE_GOLDEN)
+    filename = "tfrecord_no_schema.npz"
+    save_and_check_dict(data, filename, generate_golden=GENERATE_GOLDEN)
 
 
-def test_case_tf_file_pad():
-    logger.info("reading data from: {}".format(FILES[0]))
-    parameters = {"params": {}}
+def test_tfrecord_pad():
+    logger.info("test_tfrecord_pad")
 
     schema_file = "../data/dataset/testTFTestAllTypes/datasetSchemaPadBytes10.json"
     data = ds.TFRecordDataset(FILES, schema_file, shuffle=ds.Shuffle.FILES)
-    filename = "tf_file_padBytes10.npz"
-    save_and_check(data, parameters, filename, generate_golden=GENERATE_GOLDEN)
+    filename = "tfrecord_pad_bytes10.npz"
+    save_and_check_dict(data, filename, generate_golden=GENERATE_GOLDEN)
 
 
-def test_tf_files():
+def test_tfrecord_read_files():
+    logger.info("test_tfrecord_read_files")
     pattern = DATASET_ROOT + "/test.data"
     data = ds.TFRecordDataset(pattern, SCHEMA_FILE, shuffle=ds.Shuffle.FILES)
     assert sum([1 for _ in data]) == 12
@@ -123,7 +134,19 @@ def test_tf_files():
     assert sum([1 for _ in data]) == 24
 
 
-def test_tf_record_schema():
+def test_tfrecord_multi_files():
+    logger.info("test_tfrecord_multi_files")
+    data1 = ds.TFRecordDataset(DATA_FILES2, SCHEMA_FILE2, shuffle=False)
+    data1 = data1.repeat(1)
+    num_iter = 0
+    for _ in data1.create_dict_iterator():
+        num_iter += 1
+
+    assert num_iter == 12
+
+
+def test_tfrecord_schema():
+    logger.info("test_tfrecord_schema")
     schema = ds.Schema()
     schema.add_column('col_1d', de_type=mstype.int64, shape=[2])
     schema.add_column('col_2d', de_type=mstype.int64, shape=[2, 2])
@@ -142,7 +165,8 @@ def test_tf_record_schema():
             assert np.array_equal(t1, t2)
 
 
-def test_tf_record_shuffle():
+def test_tfrecord_shuffle():
+    logger.info("test_tfrecord_shuffle")
     ds.config.set_seed(1)
     data1 = ds.TFRecordDataset(FILES, schema=SCHEMA_FILE, shuffle=ds.Shuffle.GLOBAL)
     data2 = ds.TFRecordDataset(FILES, schema=SCHEMA_FILE, shuffle=ds.Shuffle.FILES)
@@ -153,7 +177,8 @@ def test_tf_record_shuffle():
             assert np.array_equal(t1, t2)
 
 
-def test_tf_record_shard():
+def test_tfrecord_shard():
+    logger.info("test_tfrecord_shard")
     tf_files = ["../data/dataset/tf_file_dataset/test1.data", "../data/dataset/tf_file_dataset/test2.data",
                 "../data/dataset/tf_file_dataset/test3.data", "../data/dataset/tf_file_dataset/test4.data"]
 
@@ -181,7 +206,8 @@ def test_tf_record_shard():
     assert set(worker2_res) == set(worker1_res)
 
 
-def test_tf_shard_equal_rows():
+def test_tfrecord_shard_equal_rows():
+    logger.info("test_tfrecord_shard_equal_rows")
     tf_files = ["../data/dataset/tf_file_dataset/test1.data", "../data/dataset/tf_file_dataset/test2.data",
                 "../data/dataset/tf_file_dataset/test3.data", "../data/dataset/tf_file_dataset/test4.data"]
 
@@ -209,7 +235,8 @@ def test_tf_shard_equal_rows():
     assert len(worker4_res) == 40
 
 
-def test_case_tf_file_no_schema_columns_list():
+def test_tfrecord_no_schema_columns_list():
+    logger.info("test_tfrecord_no_schema_columns_list")
     data = ds.TFRecordDataset(FILES, shuffle=False, columns_list=["col_sint16"])
     row = data.create_dict_iterator().get_next()
     assert row["col_sint16"] == [-32768]
@@ -219,7 +246,8 @@ def test_case_tf_file_no_schema_columns_list():
     assert "col_sint32" in str(info.value)
 
 
-def test_tf_record_schema_columns_list():
+def test_tfrecord_schema_columns_list():
+    logger.info("test_tfrecord_schema_columns_list")
     schema = ds.Schema()
     schema.add_column('col_1d', de_type=mstype.int64, shape=[2])
     schema.add_column('col_2d', de_type=mstype.int64, shape=[2, 2])
@@ -238,7 +266,8 @@ def test_tf_record_schema_columns_list():
     assert "col_sint32" in str(info.value)
 
 
-def test_case_invalid_files():
+def test_tfrecord_invalid_files():
+    logger.info("test_tfrecord_invalid_files")
     valid_file = "../data/dataset/testTFTestAllTypes/test.data"
     invalid_file = "../data/dataset/testTFTestAllTypes/invalidFile.txt"
     files = [invalid_file, valid_file, SCHEMA_FILE]
@@ -266,19 +295,20 @@ def test_case_invalid_files():
 
 
 if __name__ == '__main__':
-    test_case_tf_shape()
-    test_case_tf_read_all_dataset()
-    test_case_num_samples()
-    test_case_num_samples2()
-    test_case_tf_shape_2()
-    test_case_tf_file()
-    test_case_tf_file_no_schema()
-    test_case_tf_file_pad()
-    test_tf_files()
-    test_tf_record_schema()
-    test_tf_record_shuffle()
-    test_tf_record_shard()
-    test_tf_shard_equal_rows()
-    test_case_tf_file_no_schema_columns_list()
-    test_tf_record_schema_columns_list()
-    test_case_invalid_files()
+    test_tfrecord_shape()
+    test_tfrecord_read_all_dataset()
+    test_tfrecord_num_samples()
+    test_tfrecord_num_samples2()
+    test_tfrecord_shape2()
+    test_tfrecord_files_basic()
+    test_tfrecord_no_schema()
+    test_tfrecord_pad()
+    test_tfrecord_read_files()
+    test_tfrecord_multi_files()
+    test_tfrecord_schema()
+    test_tfrecord_shuffle()
+    test_tfrecord_shard()
+    test_tfrecord_shard_equal_rows()
+    test_tfrecord_no_schema_columns_list()
+    test_tfrecord_schema_columns_list()
+    test_tfrecord_invalid_files()
diff --git a/tests/ut/python/dataset/test_tokenizer.py b/tests/ut/python/dataset/test_tokenizer.py
deleted file mode 100644
index 2ec988d8dcd..00000000000
--- a/tests/ut/python/dataset/test_tokenizer.py
+++ /dev/null
@@ -1,233 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""
-Testing UnicodeCharTokenizer op in DE
-"""
-import numpy as np
-import mindspore.dataset as ds
-from mindspore import log as logger
-import mindspore.dataset.text as nlp
-
-DATA_FILE = "../data/dataset/testTokenizerData/1.txt"
-NORMALIZE_FILE = "../data/dataset/testTokenizerData/normalize.txt"
-REGEX_REPLACE_FILE = "../data/dataset/testTokenizerData/regex_replace.txt"
-REGEX_TOKENIZER_FILE = "../data/dataset/testTokenizerData/regex_tokenizer.txt"
-
-
-def split_by_unicode_char(input_strs):
-    """
-    Split utf-8 strings to unicode characters
-    """
-    out = []
-    for s in input_strs:
-        out.append([c for c in s])
-    return out
-
-
-def test_unicode_char_tokenizer():
-    """
-    Test UnicodeCharTokenizer
-    """
-    input_strs = ("Welcome to Beijing!", "北京欢迎您！", "我喜欢English!", "  ")
-    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
-    tokenizer = nlp.UnicodeCharTokenizer()
-    dataset = dataset.map(operations=tokenizer)
-    tokens = []
-    for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text']).tolist()
-        tokens.append(text)
-    logger.info("The out tokens is : {}".format(tokens))
-    assert split_by_unicode_char(input_strs) == tokens
-
-
-def test_whitespace_tokenizer():
-    """
-    Test WhitespaceTokenizer
-    """
-    whitespace_strs = [["Welcome", "to", "Beijing!"],
-                       ["北京欢迎您！"],
-                       ["我喜欢English!"],
-                       [""]]
-    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
-    tokenizer = nlp.WhitespaceTokenizer()
-    dataset = dataset.map(operations=tokenizer)
-    tokens = []
-    for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text']).tolist()
-        tokens.append(text)
-    logger.info("The out tokens is : {}".format(tokens))
-    assert whitespace_strs == tokens
-
-
-def test_unicode_script_tokenizer():
-    """
-    Test UnicodeScriptTokenizer when para keep_whitespace=False
-    """
-    unicode_script_strs = [["Welcome", "to", "Beijing", "!"],
-                           ["北京欢迎您", "！"],
-                           ["我喜欢", "English", "!"],
-                           [""]]
-    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
-    tokenizer = nlp.UnicodeScriptTokenizer(keep_whitespace=False)
-    dataset = dataset.map(operations=tokenizer)
-
-    tokens = []
-    for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text']).tolist()
-        tokens.append(text)
-    logger.info("The out tokens is : {}".format(tokens))
-    assert unicode_script_strs == tokens
-
-
-def test_unicode_script_tokenizer2():
-    """
-    Test UnicodeScriptTokenizer when para keep_whitespace=True
-    """
-    unicode_script_strs2 = [["Welcome", " ", "to", " ", "Beijing", "!"],
-                            ["北京欢迎您", "！"],
-                            ["我喜欢", "English", "!"],
-                            ["  "]]
-    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
-    tokenizer = nlp.UnicodeScriptTokenizer(keep_whitespace=True)
-    dataset = dataset.map(operations=tokenizer)
-    tokens = []
-    for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text']).tolist()
-        tokens.append(text)
-    logger.info("The out tokens is :", tokens)
-    assert unicode_script_strs2 == tokens
-
-
-def test_case_fold():
-    """
-    Test CaseFold
-    """
-    expect_strs = ["welcome to beijing!", "北京欢迎您!", "我喜欢english!", "  "]
-    dataset = ds.TextFileDataset(DATA_FILE, shuffle=False)
-    op = nlp.CaseFold()
-    dataset = dataset.map(operations=op)
-
-    lower_strs = []
-    for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text']).tolist()
-        lower_strs.append(text)
-    assert lower_strs == expect_strs
-
-
-def test_normalize_utf8():
-    """
-    Test NormalizeUTF8
-    """
-
-    def normalize(normalize_form):
-        dataset = ds.TextFileDataset(NORMALIZE_FILE, shuffle=False)
-        normalize = nlp.NormalizeUTF8(normalize_form=normalize_form)
-        dataset = dataset.map(operations=normalize)
-        out_bytes = []
-        out_texts = []
-        for i in dataset.create_dict_iterator():
-            out_bytes.append(i['text'])
-            out_texts.append(nlp.to_str(i['text']).tolist())
-        logger.info("The out bytes is : ", out_bytes)
-        logger.info("The out texts is: ", out_texts)
-        return out_bytes
-
-    expect_normlize_data = [
-        # NFC
-        [b'\xe1\xb9\xa9', b'\xe1\xb8\x8d\xcc\x87', b'q\xcc\xa3\xcc\x87',
-         b'\xef\xac\x81', b'2\xe2\x81\xb5', b'\xe1\xba\x9b\xcc\xa3'],
-        # NFKC
-        [b'\xe1\xb9\xa9', b'\xe1\xb8\x8d\xcc\x87', b'q\xcc\xa3\xcc\x87',
-         b'fi', b'25', b'\xe1\xb9\xa9'],
-        # NFD
-        [b's\xcc\xa3\xcc\x87', b'd\xcc\xa3\xcc\x87', b'q\xcc\xa3\xcc\x87',
-         b'\xef\xac\x81', b'2\xe2\x81\xb5', b'\xc5\xbf\xcc\xa3\xcc\x87'],
-        # NFKD
-        [b's\xcc\xa3\xcc\x87', b'd\xcc\xa3\xcc\x87', b'q\xcc\xa3\xcc\x87',
-         b'fi', b'25', b's\xcc\xa3\xcc\x87']
-    ]
-    assert normalize(nlp.utils.NormalizeForm.NFC) == expect_normlize_data[0]
-    assert normalize(nlp.utils.NormalizeForm.NFKC) == expect_normlize_data[1]
-    assert normalize(nlp.utils.NormalizeForm.NFD) == expect_normlize_data[2]
-    assert normalize(nlp.utils.NormalizeForm.NFKD) == expect_normlize_data[3]
-
-
-def test_regex_replace():
-    """
-    Test RegexReplace
-    """
-
-    def regex_replace(first, last, expect_str, pattern, replace):
-        dataset = ds.TextFileDataset(REGEX_REPLACE_FILE, shuffle=False)
-        if first > 1:
-            dataset = dataset.skip(first - 1)
-        if last >= first:
-            dataset = dataset.take(last - first + 1)
-        replace_op = nlp.RegexReplace(pattern, replace)
-        dataset = dataset.map(operations=replace_op)
-        out_text = []
-        for i in dataset.create_dict_iterator():
-            text = nlp.to_str(i['text']).tolist()
-            out_text.append(text)
-        logger.info("Out:", out_text)
-        logger.info("Exp:", expect_str)
-        assert expect_str == out_text
-
-    regex_replace(1, 2, ['H____ W____', "L__'_ G_"], "\\p{Ll}", '_')
-    regex_replace(3, 5, ['hello', 'world', '31:beijing'], "^(\\d:|b:)", "")
-    regex_replace(6, 6, ["WelcometoChina!"], "\\s+", "")
-    regex_replace(7, 8, ['我不想长大', 'WelcometoShenzhen!'], "\\p{Cc}|\\p{Cf}|\\s+", "")
-
-
-def test_regex_tokenizer():
-    """
-    Test RegexTokenizer
-    """
-
-    def regex_tokenizer(first, last, expect_str, delim_pattern, keep_delim_pattern):
-        dataset = ds.TextFileDataset(REGEX_TOKENIZER_FILE, shuffle=False)
-        if first > 1:
-            dataset = dataset.skip(first - 1)
-        if last >= first:
-            dataset = dataset.take(last - first + 1)
-        tokenizer_op = nlp.RegexTokenizer(delim_pattern, keep_delim_pattern)
-        dataset = dataset.map(operations=tokenizer_op)
-        out_text = []
-        count = 0
-        for i in dataset.create_dict_iterator():
-            text = nlp.to_str(i['text']).tolist()
-            np.testing.assert_array_equal(text, expect_str[count])
-            count += 1
-            out_text.append(text)
-        logger.info("Out:", out_text)
-        logger.info("Exp:", expect_str)
-
-    regex_tokenizer(1, 1, [['Welcome', 'to', 'Shenzhen!']], "\\s+", "")
-    regex_tokenizer(1, 1, [['Welcome', ' ', 'to', ' ', 'Shenzhen!']], "\\s+", "\\s+")
-    regex_tokenizer(2, 2, [['北', '京', '欢', '迎', '您', '!Welcome to Beijing!']], r"\p{Han}", r"\p{Han}")
-    regex_tokenizer(3, 3, [['12', '￥+', '36', '￥=?']], r"[\p{P}|\p{S}]+", r"[\p{P}|\p{S}]+")
-    regex_tokenizer(3, 3, [['12', '36']], r"[\p{P}|\p{S}]+", "")
-    regex_tokenizer(3, 3, [['￥+', '￥=?']], r"[\p{N}]+", "")
-
-
-if __name__ == '__main__':
-    test_unicode_char_tokenizer()
-    test_whitespace_tokenizer()
-    test_unicode_script_tokenizer()
-    test_unicode_script_tokenizer2()
-    test_case_fold()
-    test_normalize_utf8()
-    test_regex_replace()
-    test_regex_tokenizer()
diff --git a/tests/ut/python/dataset/test_uniform_augment.py b/tests/ut/python/dataset/test_uniform_augment.py
index a26b6472656..e5b66696eaf 100644
--- a/tests/ut/python/dataset/test_uniform_augment.py
+++ b/tests/ut/python/dataset/test_uniform_augment.py
@@ -16,6 +16,7 @@
 Testing UniformAugment in DE
 """
 import numpy as np
+import pytest
 
 import mindspore.dataset.engine as de
 import mindspore.dataset.transforms.vision.c_transforms as C
@@ -164,12 +165,13 @@ def test_cpp_uniform_augment_exception_pyops(num_ops=2):
                      C.RandomRotation(degrees=45),
                      F.Invert()]
 
-    try:
+    with pytest.raises(TypeError) as e:
         _ = C.UniformAugment(operations=transforms_ua, num_ops=num_ops)
 
-    except Exception as e:
-        logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "operations" in str(e)
+    logger.info("Got an exception in DE: {}".format(str(e)))
+    assert "Argument tensor_op_5 with value" \
+           " <mindspore.dataset.transforms.vision.py_transforms.Invert" in str(e.value)
+    assert "is not of type (<class 'mindspore._c_dataengine.TensorOp'>,)" in str(e.value)
 
 
 def test_cpp_uniform_augment_exception_large_numops(num_ops=6):
@@ -209,7 +211,7 @@ def test_cpp_uniform_augment_exception_nonpositive_numops(num_ops=0):
 
     except Exception as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "num_ops" in str(e)
+        assert "Input num_ops must be greater than 0" in str(e)
 
 
 def test_cpp_uniform_augment_exception_float_numops(num_ops=2.5):
@@ -229,7 +231,7 @@ def test_cpp_uniform_augment_exception_float_numops(num_ops=2.5):
 
     except Exception as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "integer" in str(e)
+        assert "Argument num_ops with value 2.5 is not of type (<class 'int'>,)" in str(e)
 
 
 def test_cpp_uniform_augment_random_crop_badinput(num_ops=1):
diff --git a/tests/ut/python/dataset/test_vocab.py b/tests/ut/python/dataset/test_vocab.py
index 35411e5c80e..05451813601 100644
--- a/tests/ut/python/dataset/test_vocab.py
+++ b/tests/ut/python/dataset/test_vocab.py
@@ -26,7 +26,7 @@ SIMPLE_VOCAB_FILE = "../data/dataset/testVocab/simple_vocab_list.txt"
 
 def test_from_list_tutorial():
     vocab = text.Vocab.from_list("home IS behind the world ahead !".split(" "), ["<pad>", "<unk>"], True)
-    lookup = text.Lookup(vocab)
+    lookup = text.Lookup(vocab, "<unk>")
     data = ds.TextFileDataset(DATA_FILE, shuffle=False)
     data = data.map(input_columns=["text"], operations=lookup)
     ind = 0
@@ -50,7 +50,7 @@ def test_from_file_tutorial():
 
 def test_from_dict_tutorial():
     vocab = text.Vocab.from_dict({"home": 3, "behind": 2, "the": 4, "world": 5, "<unk>": 6})
-    lookup = text.Lookup(vocab, 6)  # default value is -1
+    lookup = text.Lookup(vocab, "<unk>")  # any unknown token will be mapped to the id of <unk>
     data = ds.TextFileDataset(DATA_FILE, shuffle=False)
     data = data.map(input_columns=["text"], operations=lookup)
     res = [3, 6, 2, 4, 5, 6]
@@ -60,33 +60,51 @@ def test_from_dict_tutorial():
         ind += 1
 
 
+def test_from_dict_exception():
+    try:
+        vocab = text.Vocab.from_dict({"home": -1, "behind": 0})
+        if not vocab:
+            raise ValueError("Vocab is None")
+    except ValueError as e:
+        assert "is not within the required interval" in str(e)
+
+
 def test_from_list():
     def gen(texts):
         for word in texts.split(" "):
             yield (np.array(word, dtype='S'),)
 
-    def test_config(lookup_str, vocab_input, special_tokens, special_first):
+    def test_config(lookup_str, vocab_input, special_tokens, special_first, unknown_token):
         try:
             vocab = text.Vocab.from_list(vocab_input, special_tokens, special_first)
             data = ds.GeneratorDataset(gen(lookup_str), column_names=["text"])
-            data = data.map(input_columns=["text"], operations=text.Lookup(vocab))
+            data = data.map(input_columns=["text"], operations=text.Lookup(vocab, unknown_token))
             res = []
             for d in data.create_dict_iterator():
                 res.append(d["text"].item())
             return res
-        except ValueError as e:
+        except (ValueError, RuntimeError, TypeError) as e:
             return str(e)
 
+    # test basic default config, special_token=None, unknown_token=None
+    assert test_config("w1 w2 w3", ["w1", "w2", "w3"], None, True, None) == [0, 1, 2]
     # test normal operations
-    assert test_config("w1 w2 w3 s1 s2", ["w1", "w2", "w3"], ["s1", "s2"], True) == [2, 3, 4, 0, 1]
-    assert test_config("w1 w2 w3 s1 s2", ["w1", "w2", "w3"], ["s1", "s2"], False) == [0, 1, 2, 3, 4]
-    assert test_config("w3 w2 w1", ["w1", "w2", "w3"], None, True) == [2, 1, 0]
-    assert test_config("w3 w2 w1", ["w1", "w2", "w3"], None, False) == [2, 1, 0]
+    assert test_config("w1 w2 w3 s1 s2 ephemeral", ["w1", "w2", "w3"], ["s1", "s2"], True, "s2") == [2, 3, 4, 0, 1, 1]
+    assert test_config("w1 w2 w3 s1 s2", ["w1", "w2", "w3"], ["s1", "s2"], False, "s2") == [0, 1, 2, 3, 4]
+    assert test_config("w3 w2 w1", ["w1", "w2", "w3"], None, True, "w1") == [2, 1, 0]
+    assert test_config("w3 w2 w1", ["w1", "w2", "w3"], None, False, "w1") == [2, 1, 0]
+    # test unknown token lookup
+    assert test_config("w1 un1 w3 un2", ["w1", "w2", "w3"], ["<pad>", "<unk>"], True, "<unk>") == [2, 1, 4, 1]
+    assert test_config("w1 un1 w3 un2", ["w1", "w2", "w3"], ["<pad>", "<unk>"], False, "<unk>") == [0, 4, 2, 4]
 
     # test exceptions
-    assert "word_list contains duplicate" in test_config("w1", ["w1", "w1"], [], True)
-    assert "special_tokens contains duplicate" in test_config("w1", ["w1", "w2"], ["s1", "s1"], True)
-    assert "special_tokens and word_list contain duplicate" in test_config("w1", ["w1", "w2"], ["s1", "w1"], True)
+    assert "doesn't exist in vocab." in test_config("un1", ["w1"], [], False, "unk")
+    assert "doesn't exist in vocab and no unknown token is specified." in test_config("un1", ["w1"], [], False, None)
+    assert "doesn't exist in vocab" in test_config("un1", ["w1"], [], False, None)
+    assert "word_list contains duplicate" in test_config("w1", ["w1", "w1"], [], True, "w1")
+    assert "special_tokens contains duplicate" in test_config("w1", ["w1", "w2"], ["s1", "s1"], True, "w1")
+    assert "special_tokens and word_list contain duplicate" in test_config("w1", ["w1", "w2"], ["s1", "w1"], True, "w1")
+    assert "is not of type" in test_config("w1", ["w1", "w2"], ["s1"], True, 123)
 
 
 def test_from_file():
@@ -99,7 +117,7 @@ def test_from_file():
             vocab = text.Vocab.from_file(SIMPLE_VOCAB_FILE, vocab_size=vocab_size, special_tokens=special_tokens,
                                          special_first=special_first)
             data = ds.GeneratorDataset(gen(lookup_str), column_names=["text"])
-            data = data.map(input_columns=["text"], operations=text.Lookup(vocab))
+            data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "s2"))
             res = []
             for d in data.create_dict_iterator():
                 res.append(d["text"].item())
@@ -118,6 +136,7 @@ def test_from_file():
 
 
 if __name__ == '__main__':
+    test_from_dict_exception()
     test_from_list_tutorial()
     test_from_file_tutorial()
     test_from_dict_tutorial()
diff --git a/tests/ut/python/dataset/test_wordpiece_tokenizer.py b/tests/ut/python/dataset/test_wordpiece_tokenizer.py
deleted file mode 100644
index 79348847408..00000000000
--- a/tests/ut/python/dataset/test_wordpiece_tokenizer.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""
-Testing WordpieceTokenizer op in DE
-"""
-import numpy as np
-import mindspore.dataset as ds
-from mindspore import log as logger
-import mindspore.dataset.text as nlp
-
-WORDPIECE_TOKENIZER_FILE = "../data/dataset/testTokenizerData/wordpiece_tokenizer.txt"
-
-vocab_english = [
-    "book", "cholera", "era", "favor", "##ite", "my", "is", "love", "dur", "##ing", "the"
-]
-
-vocab_chinese = [
-    "我", '最', '喜', '欢', '的', '书', '是', '霍', '乱', '时', '期', '爱', '情'
-]
-
-vocab_mix = vocab_chinese + vocab_english
-
-test_paras = [
-    dict(
-        first=1,
-        last=10,
-        expect_str=[['my'], ['favor', '##ite'], ['book'], ['is'], ['love'], ['dur', '##ing'], ['the'], ['cholera'],
-                    ['era'], ['[UNK]']],
-        vocab_list=vocab_english
-    ),
-    dict(
-        first=1,
-        last=10,
-        expect_str=[['my'], ['favor', '##ite'], ['book'], ['is'], ['love'], ['dur', '##ing'], ['the'], ['cholera'],
-                    ['era'], ['what']],
-        vocab_list=vocab_english,
-        unknown_token=""
-    ),
-    dict(
-        first=1,
-        last=10,
-        expect_str=[['my'], ['[UNK]'], ['book'], ['is'], ['love'], ['[UNK]'], ['the'], ['[UNK]'], ['era'], ['[UNK]']],
-        vocab_list=vocab_english,
-        max_bytes_per_token=4
-    ),
-    dict(
-        first=11,
-        last=25,
-        expect_str=[['我'], ['最'], ['喜'], ['欢'], ['的'], ['书'], ['是'], ['霍'], ['乱'], ['时'], ['期'], ['的'], ['爱'], ['情'],
-                    ['[UNK]']],
-        vocab_list=vocab_chinese,
-    ),
-    dict(
-        first=25,
-        last=25,
-        expect_str=[['您']],
-        vocab_list=vocab_chinese,
-        unknown_token=""
-    ),
-    dict(
-        first=1,
-        last=25,
-        expect_str=[
-            ['my'], ['favor', '##ite'], ['book'], ['is'], ['love'], ['dur', '##ing'], ['the'], ['cholera'], ['era'],
-            ['[UNK]'],
-            ['我'], ['最'], ['喜'], ['欢'], ['的'], ['书'], ['是'], ['霍'], ['乱'], ['时'], ['期'], ['的'], ['爱'], ['情'],
-            ['[UNK]']],
-        vocab_list=vocab_mix,
-    ),
-]
-
-
-def check_wordpiece_tokenizer(first, last, expect_str, vocab_list, unknown_token='[UNK]', max_bytes_per_token=100):
-    dataset = ds.TextFileDataset(WORDPIECE_TOKENIZER_FILE, shuffle=False)
-    if first > 1:
-        dataset = dataset.skip(first - 1)
-    if last >= first:
-        dataset = dataset.take(last - first + 1)
-    vocab = nlp.Vocab.from_list(vocab_list)
-    tokenizer_op = nlp.WordpieceTokenizer(vocab=vocab, unknown_token=unknown_token,
-                                          max_bytes_per_token=max_bytes_per_token)
-    dataset = dataset.map(operations=tokenizer_op)
-    count = 0
-    for i in dataset.create_dict_iterator():
-        text = nlp.to_str(i['text'])
-        logger.info("Out:", text)
-        logger.info("Exp:", expect_str[count])
-        np.testing.assert_array_equal(text, expect_str[count])
-        count = count + 1
-
-
-def test_wordpiece_tokenizer():
-    """
-    Test WordpieceTokenizer
-    """
-    for paras in test_paras:
-        check_wordpiece_tokenizer(**paras)
-
-
-if __name__ == '__main__':
-    test_wordpiece_tokenizer()
diff --git a/tests/ut/python/dataset/util.py b/tests/ut/python/dataset/util.py
index 2a8e93cd0be..11c57354065 100644
--- a/tests/ut/python/dataset/util.py
+++ b/tests/ut/python/dataset/util.py
@@ -288,12 +288,13 @@ def config_get_set_num_parallel_workers(num_parallel_workers_new):
     return num_parallel_workers_original
 
 
-def visualize_with_bounding_boxes(orig, aug, plot_rows=3):
+def visualize_with_bounding_boxes(orig, aug, annot_name="annotation", plot_rows=3):
     """
     Take a list of un-augmented and augmented images with "annotation" bounding boxes
     Plot images to compare test correct BBox augment functionality
     :param orig: list of original images and bboxes (without aug)
     :param aug: list of augmented images and bboxes
+    :param annot_name: the dict key for bboxes in data, e.g "bbox" (COCO) / "annotation" (VOC)
     :param plot_rows: number of rows on plot (rows = samples on one plot)
     :return: None
     """
@@ -301,9 +302,10 @@ def visualize_with_bounding_boxes(orig, aug, plot_rows=3):
     def add_bounding_boxes(ax, bboxes):
         for bbox in bboxes:
             rect = patches.Rectangle((bbox[0], bbox[1]),
-                                     bbox[2], bbox[3],
-                                     linewidth=1, edgecolor='r', facecolor='none')
+                                     bbox[2]*0.997, bbox[3]*0.997,
+                                     linewidth=1.80, edgecolor='r', facecolor='none')
             # Add the patch to the Axes
+            # Params to Rectangle slightly modified to prevent drawing overflow
             ax.add_patch(rect)
 
     # Quick check to confirm correct input parameters
@@ -312,14 +314,15 @@ def visualize_with_bounding_boxes(orig, aug, plot_rows=3):
     if len(orig) != len(aug) or not orig:
         return
 
-    batch_size = int(len(orig)/plot_rows)  # creates batches of images to plot together
+    batch_size = int(len(orig) / plot_rows)  # creates batches of images to plot together
     split_point = batch_size * plot_rows
 
     orig, aug = np.array(orig), np.array(aug)
 
     if len(orig) > plot_rows:
         # Create batches of required size and add remainder to last batch
-        orig = np.split(orig[:split_point], batch_size) + ([orig[split_point:]] if (split_point < orig.shape[0]) else [])  # check to avoid empty arrays being added
+        orig = np.split(orig[:split_point], batch_size) + (
+            [orig[split_point:]] if (split_point < orig.shape[0]) else [])  # check to avoid empty arrays being added
         aug = np.split(aug[:split_point], batch_size) + ([aug[split_point:]] if (split_point < aug.shape[0]) else [])
     else:
         orig = [orig]
@@ -334,18 +337,19 @@ def visualize_with_bounding_boxes(orig, aug, plot_rows=3):
 
         for x, (dataA, dataB) in enumerate(zip(allData[0], allData[1])):
             cur_ix = base_ix + x
-            (axA, axB) = (axs[x, 0], axs[x, 1]) if (curPlot > 1) else (axs[0], axs[1])  # select plotting axes based on number of image rows on plot - else case when 1 row
+            # select plotting axes based on number of image rows on plot - else case when 1 row
+            (axA, axB) = (axs[x, 0], axs[x, 1]) if (curPlot > 1) else (axs[0], axs[1])
 
             axA.imshow(dataA["image"])
-            add_bounding_boxes(axA, dataA["annotation"])
+            add_bounding_boxes(axA, dataA[annot_name])
             axA.title.set_text("Original" + str(cur_ix+1))
 
             axB.imshow(dataB["image"])
-            add_bounding_boxes(axB, dataB["annotation"])
+            add_bounding_boxes(axB, dataB[annot_name])
             axB.title.set_text("Augmented" + str(cur_ix+1))
 
-            logger.info("Original **\n{} : {}".format(str(cur_ix+1), dataA["annotation"]))
-            logger.info("Augmented **\n{} : {}\n".format(str(cur_ix+1), dataB["annotation"]))
+            logger.info("Original **\n{} : {}".format(str(cur_ix+1), dataA[annot_name]))
+            logger.info("Augmented **\n{} : {}\n".format(str(cur_ix+1), dataB[annot_name]))
 
         plt.show()
 
@@ -381,19 +385,19 @@ def check_bad_bbox(data, test_op, invalid_bbox_type, expected_error):
         width = img.shape[1]
         if invalid_bbox_type_ == InvalidBBoxType.WidthOverflow:
             # use box that overflows on width
-            return img, np.array([[0, 0, width + 1, height, 0, 0, 0]]).astype(np.uint32)
+            return img, np.array([[0, 0, width + 1, height, 0, 0, 0]]).astype(np.float32)
 
         if invalid_bbox_type_ == InvalidBBoxType.HeightOverflow:
             # use box that overflows on height
-            return img, np.array([[0, 0, width, height + 1, 0, 0, 0]]).astype(np.uint32)
+            return img, np.array([[0, 0, width, height + 1, 0, 0, 0]]).astype(np.float32)
 
         if invalid_bbox_type_ == InvalidBBoxType.NegativeXY:
             # use box with negative xy
-            return img, np.array([[-10, -10, width, height, 0, 0, 0]]).astype(np.uint32)
+            return img, np.array([[-10, -10, width, height, 0, 0, 0]]).astype(np.float32)
 
         if invalid_bbox_type_ == InvalidBBoxType.WrongShape:
             # use box that has incorrect shape
-            return img, np.array([[0, 0, width - 1]]).astype(np.uint32)
+            return img, np.array([[0, 0, width - 1]]).astype(np.float32)
         return img, bboxes
 
     try:
diff --git a/tests/ut/python/ir/test_indexed_slices.py b/tests/ut/python/ir/test_indexed_slices.py
index 86901830907..36dfe464cb4 100644
--- a/tests/ut/python/ir/test_indexed_slices.py
+++ b/tests/ut/python/ir/test_indexed_slices.py
@@ -36,6 +36,8 @@ from mindspore._checkparam import Rel
 from mindspore.nn import Optimizer
 from mindspore.nn import TrainOneStepCell, WithLossCell
 
+context.set_context(mode=context.GRAPH_MODE, enable_sparse=True)
+
 reduce_sum = P.ReduceSum()
 unsorted_segment_sum = P.UnsortedSegmentSum()
 transpose = P.Transpose()
@@ -44,7 +46,6 @@ reshape = P.Reshape()
 size_op = P.Size()
 invert_permutation = P.InvertPermutation()
 logical_and = P.LogicalAnd()
-context.set_context(mode=context.GRAPH_MODE, enable_sparse=True)
 
 @constexpr
 def _generate_shape_index(out_shape, indices_shape, axis):
@@ -103,10 +104,15 @@ def get_bprop_sparse_gather_v2(self):
 
 adam_opt_for_map = C.MultitypeFuncGraph("adam_opt_for_map")
 @adam_opt_for_map.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
-                           "Tensor", "Tensor", "Tensor", "Undetermined", "Bool")
-def _update_run_op_for_map(beta1, beta2, eps, lr, weight_decay_tensor, param, m, v, gradient, decay_flag):
-    if gradient.is_indexed_slices():
-        return gradient.values()
+                           "Tensor", "Tensor", "Tensor", "IndexedSlices", "Bool")
+def _update_run_op_for_map_indexed_slices(beta1, beta2, eps, lr, weight_decay_tensor, param,
+                                          m, v, gradient, decay_flag):
+    return gradient.values()
+
+@adam_opt_for_map.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
+                           "Tensor", "Tensor", "Tensor", "Tensor", "Bool")
+def _update_run_op_for_map_tensor(beta1, beta2, eps, lr, weight_decay_tensor, param,
+                                  m, v, gradient, decay_flag):
     op_mul = P.Mul()
     op_square = P.Square()
     op_sqrt = P.Sqrt()
@@ -182,7 +188,7 @@ def test_indexed_slices_make_indexed_slices():
             self.dense_shape = (3, 4)
         def construct(self, indices, values):
             ret = (IndexedSlices(indices, values, self.dense_shape),)
-            return ret[0].is_indexed_slices()
+            return ret[0]
     indices = Tensor([[0, 0], [1, 2]])
     values = Tensor([1, 2], dtype=ms.float32)
     MakeIndexedSlices()(indices, values)
@@ -209,7 +215,7 @@ def test_indexed_slices_sparse_gatherv2_grad_all():
             self.network = network
         def construct(self, x, y):
             grad = grad_all(self.network)(x, y)
-            return grad, grad[0].is_indexed_slices(), grad[1].is_indexed_slices()
+            return grad, grad[0], grad[1]
     class SparseGatherV2(nn.Cell):
         def __init__(self):
             super(SparseGatherV2, self).__init__()
@@ -233,14 +239,13 @@ def test_indexed_slices_sparse_gatherv2_grad_with_pram():
             weights = self.weights
             grad = grad_by_list(self.network, weights)(x)
             x = grad[0]
-            return x.is_indexed_slices(), x.values(), x.indices(), x.dense_shape()
+            return x, x.values(), x.indices(), x.dense_shape()
     class SparseGatherV2(nn.Cell):
         def __init__(self):
             super(SparseGatherV2, self).__init__()
             self.sparse_gatherv2 = MySparseGatherV2()
             self.axis = 0
-            self.params = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.int32)),
-                                    name="params", has_indexed_slices_grad=True)
+            self.params = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.int32)), name="params")
         def construct(self, indices):
             return self.sparse_gatherv2(self.params, indices, self.axis)
     indices = Tensor(np.array([0, 1]).astype(np.int32))
@@ -248,20 +253,6 @@ def test_indexed_slices_sparse_gatherv2_grad_with_pram():
     network(indices)
 
 
-def test_indexed_slices_is_indexed_slices():
-    class MakeIndexedSlices(nn.Cell):
-        def __init__(self):
-            super(MakeIndexedSlices, self).__init__()
-            self.dense_shape = (3, 4)
-        def construct(self, indices, values):
-            indexed_slices = IndexedSlices(indices, values, self.dense_shape)
-            ret = indexed_slices.is_indexed_slices()
-            return ret
-    indices = Tensor([[0, 0], [1, 2]])
-    values = Tensor([1, 2], dtype=ms.float32)
-    MakeIndexedSlices()(indices, values)
-
-
 def test_indexed_slices_env_get():
     class Loss(nn.Cell):
         def __init__(self):
@@ -271,7 +262,7 @@ def test_indexed_slices_env_get():
     class NetWithSparseGatherV2(nn.Cell):
         def __init__(self):
             super(NetWithSparseGatherV2, self).__init__()
-            self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", has_indexed_slices_grad=True)
+            self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1")
             self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2")
             self.gatherv2 = MySparseGatherV2()
             self.axis = 0
diff --git a/tests/ut/python/model/resnet.py b/tests/ut/python/model/resnet.py
new file mode 100644
index 00000000000..001e1db0cf3
--- /dev/null
+++ b/tests/ut/python/model/resnet.py
@@ -0,0 +1,282 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""ResNet."""
+import numpy as np
+import mindspore.nn as nn
+from mindspore.ops import operations as P
+from mindspore.common.tensor import Tensor
+
+
+def _weight_variable(shape, factor=0.01):
+    init_value = np.random.randn(*shape).astype(np.float32) * factor
+    return Tensor(init_value)
+
+
+def _conv3x3(in_channel, out_channel, stride=1):
+    weight_shape = (out_channel, in_channel, 3, 3)
+    weight = _weight_variable(weight_shape)
+    return nn.Conv2d(in_channel, out_channel,
+                     kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight)
+
+
+def _conv1x1(in_channel, out_channel, stride=1):
+    weight_shape = (out_channel, in_channel, 1, 1)
+    weight = _weight_variable(weight_shape)
+    return nn.Conv2d(in_channel, out_channel,
+                     kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight)
+
+
+def _conv7x7(in_channel, out_channel, stride=1):
+    weight_shape = (out_channel, in_channel, 7, 7)
+    weight = _weight_variable(weight_shape)
+    return nn.Conv2d(in_channel, out_channel,
+                     kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight)
+
+
+def _bn(channel):
+    return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
+                          gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
+
+
+def _bn_last(channel):
+    return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
+                          gamma_init=0, beta_init=0, moving_mean_init=0, moving_var_init=1)
+
+
+def _fc(in_channel, out_channel):
+    weight_shape = (out_channel, in_channel)
+    weight = _weight_variable(weight_shape)
+    return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0)
+
+
+class ResidualBlock(nn.Cell):
+    """
+    ResNet V1 residual block definition.
+
+    Args:
+        in_channel (int): Input channel.
+        out_channel (int): Output channel.
+        stride (int): Stride size for the first convolutional layer. Default: 1.
+
+    Returns:
+        Tensor, output tensor.
+
+    Examples:
+        >>> ResidualBlock(3, 256, stride=2)
+    """
+    expansion = 4
+
+    def __init__(self,
+                 in_channel,
+                 out_channel,
+                 stride=1):
+        super(ResidualBlock, self).__init__()
+
+        channel = out_channel // self.expansion
+        self.conv1 = _conv1x1(in_channel, channel, stride=1)
+        self.bn1 = _bn(channel)
+
+        self.conv2 = _conv3x3(channel, channel, stride=stride)
+        self.bn2 = _bn(channel)
+
+        self.conv3 = _conv1x1(channel, out_channel, stride=1)
+        self.bn3 = _bn_last(out_channel)
+
+        self.relu = nn.ReLU()
+
+        self.down_sample = False
+
+        if stride != 1 or in_channel != out_channel:
+            self.down_sample = True
+        self.down_sample_layer = None
+
+        if self.down_sample:
+            self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride),
+                                                        _bn(out_channel)])
+        self.add = P.TensorAdd()
+
+    def construct(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.down_sample:
+            identity = self.down_sample_layer(identity)
+
+        out = self.add(out, identity)
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(nn.Cell):
+    """
+    ResNet architecture.
+
+    Args:
+        block (Cell): Block for network.
+        layer_nums (list): Numbers of block in different layers.
+        in_channels (list): Input channel in each layer.
+        out_channels (list): Output channel in each layer.
+        strides (list):  Stride size in each layer.
+        num_classes (int): The number of classes that the training images are belonging to.
+    Returns:
+        Tensor, output tensor.
+
+    Examples:
+        >>> ResNet(ResidualBlock,
+        >>>        [3, 4, 6, 3],
+        >>>        [64, 256, 512, 1024],
+        >>>        [256, 512, 1024, 2048],
+        >>>        [1, 2, 2, 2],
+        >>>        10)
+    """
+
+    def __init__(self,
+                 block,
+                 layer_nums,
+                 in_channels,
+                 out_channels,
+                 strides,
+                 num_classes):
+        super(ResNet, self).__init__()
+
+        if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
+            raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
+
+        self.conv1 = _conv7x7(3, 64, stride=2)
+        self.bn1 = _bn(64)
+        self.relu = P.ReLU()
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+
+        self.layer1 = self._make_layer(block,
+                                       layer_nums[0],
+                                       in_channel=in_channels[0],
+                                       out_channel=out_channels[0],
+                                       stride=strides[0])
+        self.layer2 = self._make_layer(block,
+                                       layer_nums[1],
+                                       in_channel=in_channels[1],
+                                       out_channel=out_channels[1],
+                                       stride=strides[1])
+        self.layer3 = self._make_layer(block,
+                                       layer_nums[2],
+                                       in_channel=in_channels[2],
+                                       out_channel=out_channels[2],
+                                       stride=strides[2])
+        self.layer4 = self._make_layer(block,
+                                       layer_nums[3],
+                                       in_channel=in_channels[3],
+                                       out_channel=out_channels[3],
+                                       stride=strides[3])
+
+        self.mean = P.ReduceMean(keep_dims=True)
+        self.flatten = nn.Flatten()
+        self.end_point = _fc(out_channels[3], num_classes)
+
+    def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
+        """
+        Make stage network of ResNet.
+
+        Args:
+            block (Cell): Resnet block.
+            layer_num (int): Layer number.
+            in_channel (int): Input channel.
+            out_channel (int): Output channel.
+            stride (int): Stride size for the first convolutional layer.
+
+        Returns:
+            SequentialCell, the output layer.
+
+        Examples:
+            >>> _make_layer(ResidualBlock, 3, 128, 256, 2)
+        """
+        layers = []
+
+        resnet_block = block(in_channel, out_channel, stride=stride)
+        layers.append(resnet_block)
+
+        for _ in range(1, layer_num):
+            resnet_block = block(out_channel, out_channel, stride=1)
+            layers.append(resnet_block)
+
+        return nn.SequentialCell(layers)
+
+    def construct(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        c1 = self.maxpool(x)
+
+        c2 = self.layer1(c1)
+        c3 = self.layer2(c2)
+        c4 = self.layer3(c3)
+        c5 = self.layer4(c4)
+
+        out = self.mean(c5, (2, 3))
+        out = self.flatten(out)
+        out = self.end_point(out)
+
+        return out
+
+
+def resnet50(class_num=10):
+    """
+    Get ResNet50 neural network.
+
+    Args:
+        class_num (int): Class number.
+
+    Returns:
+        Cell, cell instance of ResNet50 neural network.
+
+    Examples:
+        >>> net = resnet50(10)
+    """
+    return ResNet(ResidualBlock,
+                  [3, 4, 6, 3],
+                  [64, 256, 512, 1024],
+                  [256, 512, 1024, 2048],
+                  [1, 2, 2, 2],
+                  class_num)
+
+def resnet101(class_num=1001):
+    """
+    Get ResNet101 neural network.
+
+    Args:
+        class_num (int): Class number.
+
+    Returns:
+        Cell, cell instance of ResNet101 neural network.
+
+    Examples:
+        >>> net = resnet101(1001)
+    """
+    return ResNet(ResidualBlock,
+                  [3, 4, 23, 3],
+                  [64, 256, 512, 1024],
+                  [256, 512, 1024, 2048],
+                  [1, 2, 2, 2],
+                  class_num)
diff --git a/tests/ut/python/model/test_mix_precision.py b/tests/ut/python/model/test_mix_precision.py
index d0e77f901ad..f1fc2cc2f71 100644
--- a/tests/ut/python/model/test_mix_precision.py
+++ b/tests/ut/python/model/test_mix_precision.py
@@ -219,3 +219,31 @@ def test_dict_cast():
     y = Tensor(np.array([4, 5.5, 6.5]), mstype.float32)
     net = FirstNet()
     net(x, y)
+
+
+def test_kwarg_cast():
+    class FirstNet(nn.Cell):
+        def __init__(self):
+            super(FirstNet, self).__init__()
+            self.net = SecondNet().add_flags_recursive(fp16=True)
+            self.add = P.TensorAdd()
+
+        def construct(self, tensor_a, tensor_b):
+            tensor_c = self.add(tensor_a, tensor_b)
+            dictionary = {"key": tensor_a}
+            result = self.net(key1=tensor_c, key2=dictionary)
+            return result
+
+    class SecondNet(nn.Cell):
+        def __init__(self):
+            super(SecondNet, self).__init__()
+            self.add = P.TensorAdd()
+
+        def construct(self, key1=1, key2=2):
+            tensor_d = self.add(key1, key2["key"])
+            return tensor_d
+
+    x = Tensor(np.array([1, 2.5, 3.5]), mstype.float32)
+    y = Tensor(np.array([4, 5.5, 6.5]), mstype.float32)
+    net = FirstNet()
+    net(x, y)
diff --git a/tests/ut/python/nn/optim/test_adam.py b/tests/ut/python/nn/optim/test_adam.py
index b435bf65b95..03a73893c50 100644
--- a/tests/ut/python/nn/optim/test_adam.py
+++ b/tests/ut/python/nn/optim/test_adam.py
@@ -17,12 +17,13 @@ import numpy as np
 import pytest
 
 import mindspore.nn as nn
-from mindspore import Tensor, Parameter
+from mindspore import Tensor, Parameter, context
 from mindspore.common.api import _executor
 from mindspore.nn import TrainOneStepCell, WithLossCell
 from mindspore.nn.optim import Adam, AdamWeightDecay, AdamWeightDecayDynamicLR
 from mindspore.ops import operations as P
 
+context.set_context(enable_sparse=True)
 
 class Net(nn.Cell):
     """ Net definition """
@@ -53,8 +54,7 @@ class NetWithSparseGatherV2(nn.Cell):
     """ NetWithSparseGatherV2 definition """
     def __init__(self):
         super(NetWithSparseGatherV2, self).__init__()
-        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
-                                 name="weight1", sparse_grad="sparse_key_w1")
+        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1")
         self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
         self.axis = 0
         self.gather = P.SparseGatherV2()
diff --git a/tests/ut/python/nn/optim/test_adam_with_tuple_grad.py b/tests/ut/python/nn/optim/test_adam_with_tuple_grad.py
index 7f9f341a931..23aad24c475 100644
--- a/tests/ut/python/nn/optim/test_adam_with_tuple_grad.py
+++ b/tests/ut/python/nn/optim/test_adam_with_tuple_grad.py
@@ -27,6 +27,7 @@ from mindspore.ops import functional as F
 from mindspore._checkparam import Validator as validator
 from mindspore._checkparam import Rel
 
+context.set_context(enable_sparse=True)
 
 adam_opt_for_map = C.MultitypeFuncGraph("adam_opt_for_map")
 @adam_opt_for_map.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
@@ -154,7 +155,7 @@ def test_AdamWeightDecaySparse():
     class NetWithSparseGatherV2(nn.Cell):
         def __init__(self):
             super(NetWithSparseGatherV2, self).__init__()
-            self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad="sparse_key_w1")
+            self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1")
             self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2")
             self.gatherv2 = P.SparseGatherV2()
             self.axis = 0
diff --git a/tests/ut/python/nn/optim/test_ftrl.py b/tests/ut/python/nn/optim/test_ftrl.py
index de59dfdbad6..670bebc92d0 100644
--- a/tests/ut/python/nn/optim/test_ftrl.py
+++ b/tests/ut/python/nn/optim/test_ftrl.py
@@ -17,12 +17,13 @@
 import numpy as np
 
 import mindspore.nn as nn
-from mindspore import Tensor, Parameter
+from mindspore import Tensor, Parameter, context
 from mindspore.common.api import _executor
 from mindspore.nn import TrainOneStepCell, WithLossCell
 from mindspore.nn.optim import FTRL
 from mindspore.ops import operations as P
 
+context.set_context(enable_sparse=True)
 
 class Net(nn.Cell):
     def __init__(self):
@@ -41,8 +42,7 @@ class NetWithSparseGatherV2(nn.Cell):
     """ NetWithSparseGatherV2 definition """
     def __init__(self):
         super(NetWithSparseGatherV2, self).__init__()
-        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
-                                 name="weight1", sparse_grad="sparse_key_w1")
+        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1")
         self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
         self.axis = 0
         self.gather = P.SparseGatherV2()
diff --git a/tests/ut/python/nn/optim/test_lazyadam.py b/tests/ut/python/nn/optim/test_lazyadam.py
index ce66b404e27..77695971406 100644
--- a/tests/ut/python/nn/optim/test_lazyadam.py
+++ b/tests/ut/python/nn/optim/test_lazyadam.py
@@ -17,12 +17,13 @@ import numpy as np
 import pytest
 
 import mindspore.nn as nn
-from mindspore import Tensor, Parameter
+from mindspore import Tensor, Parameter, context
 from mindspore.common.api import _executor
 from mindspore.nn import TrainOneStepCell, WithLossCell
 from mindspore.nn.optim import LazyAdam
 from mindspore.ops import operations as P
 
+context.set_context(enable_sparse=True)
 
 class Net(nn.Cell):
     """ Net definition """
@@ -43,8 +44,7 @@ class NetWithSparseGatherV2(nn.Cell):
     """ NetWithSparseGatherV2 definition """
     def __init__(self):
         super(NetWithSparseGatherV2, self).__init__()
-        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
-                                 name="weight1", sparse_grad="sparse_key_w1")
+        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1")
         self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
         self.axis = 0
         self.gather = P.SparseGatherV2()
diff --git a/tests/ut/python/nn/optim/test_proximal_ada_grad.py b/tests/ut/python/nn/optim/test_proximal_ada_grad.py
index c7e6d3f88a7..3077896fed5 100644
--- a/tests/ut/python/nn/optim/test_proximal_ada_grad.py
+++ b/tests/ut/python/nn/optim/test_proximal_ada_grad.py
@@ -17,12 +17,13 @@
 import numpy as np
 
 import mindspore.nn as nn
-from mindspore import Tensor, Parameter
+from mindspore import Tensor, Parameter, context
 from mindspore.common.api import _executor
 from mindspore.nn import TrainOneStepCell, WithLossCell
 from mindspore.nn.optim import ProximalAdagrad
 from mindspore.ops import operations as P
 
+context.set_context(enable_sparse=True)
 
 class Net(nn.Cell):
     def __init__(self):
@@ -40,8 +41,7 @@ class NetWithSparseGatherV2(nn.Cell):
     """ NetWithSparseGatherV2 definition """
     def __init__(self):
         super(NetWithSparseGatherV2, self).__init__()
-        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1",
-                                 sparse_grad="sparse_key_w1")
+        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1")
         self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="weight2")
         self.axis = 0
         self.gather = P.SparseGatherV2()
diff --git a/tests/ut/python/nn/test_distribution.py b/tests/ut/python/nn/test_distribution.py
new file mode 100644
index 00000000000..845c64a1106
--- /dev/null
+++ b/tests/ut/python/nn/test_distribution.py
@@ -0,0 +1,369 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+Test nn.Distribution.
+
+Including Normal Distribution and Bernoulli Distribution.
+"""
+import pytest
+import numpy as np
+
+import mindspore.nn as nn
+from mindspore import dtype
+from mindspore import Tensor
+
+def test_normal_shape_errpr():
+    """
+    Invalid shapes.
+    """
+    with pytest.raises(ValueError):
+        nn.Normal([[2.], [1.]], [[2.], [3.], [4.]], dtype=dtype.float32)
+
+def test_no_arguments():
+    """
+    No args passed in during initialization.
+    """
+    n = nn.Normal()
+    assert isinstance(n, nn.Distribution)
+    b = nn.Bernoulli()
+    assert isinstance(b, nn.Distribution)
+
+def test_with_arguments():
+    """
+    Args passed in during initialization.
+    """
+    n = nn.Normal([3.0], [4.0], dtype=dtype.float32)
+    assert isinstance(n, nn.Distribution)
+    b = nn.Bernoulli([0.3, 0.5], dtype=dtype.int32)
+    assert isinstance(b, nn.Distribution)
+
+class NormalProb(nn.Cell):
+    """
+    Normal distribution: initialize with mean/sd.
+    """
+    def __init__(self):
+        super(NormalProb, self).__init__()
+        self.normal = nn.Normal(3.0, 4.0, dtype=dtype.float32)
+
+    def construct(self, value):
+        x = self.normal('prob', value)
+        y = self.normal('log_prob', value)
+        return x, y
+
+def test_normal_prob():
+    """
+    Test pdf/log_pdf: passing value through construct.
+    """
+    net = NormalProb()
+    value = Tensor([0.5, 1.0], dtype=dtype.float32)
+    pdf, log_pdf = net(value)
+    assert isinstance(pdf, Tensor)
+    assert isinstance(log_pdf, Tensor)
+
+class NormalProb1(nn.Cell):
+    """
+    Normal distribution: initialize without mean/sd.
+    """
+    def __init__(self):
+        super(NormalProb1, self).__init__()
+        self.normal = nn.Normal()
+
+    def construct(self, value, mean, sd):
+        x = self.normal('prob', value, mean, sd)
+        y = self.normal('log_prob', value, mean, sd)
+        return x, y
+
+def test_normal_prob1():
+    """
+    Test pdf/logpdf: passing mean/sd, value through construct.
+    """
+    net = NormalProb1()
+    value = Tensor([0.5, 1.0], dtype=dtype.float32)
+    mean = Tensor([0.0], dtype=dtype.float32)
+    sd = Tensor([1.0], dtype=dtype.float32)
+    pdf, log_pdf = net(value, mean, sd)
+    assert isinstance(pdf, Tensor)
+    assert isinstance(log_pdf, Tensor)
+
+class NormalProb2(nn.Cell):
+    """
+    Normal distribution: initialize with mean/sd.
+    """
+    def __init__(self):
+        super(NormalProb2, self).__init__()
+        self.normal = nn.Normal(3.0, 4.0, dtype=dtype.float32)
+
+    def construct(self, value, mean, sd):
+        x = self.normal('prob', value, mean, sd)
+        y = self.normal('log_prob', value, mean, sd)
+        return x, y
+
+def test_normal_prob2():
+    """
+    Test pdf/log_pdf: passing mean/sd through construct.
+    Overwrite original mean/sd.
+    """
+    net = NormalProb2()
+    value = Tensor([0.5, 1.0], dtype=dtype.float32)
+    mean = Tensor([0.0], dtype=dtype.float32)
+    sd = Tensor([1.0], dtype=dtype.float32)
+    pdf, log_pdf = net(value, mean, sd)
+    assert isinstance(pdf, Tensor)
+    assert isinstance(log_pdf, Tensor)
+
+class BernoulliProb(nn.Cell):
+    """
+    Bernoulli distribution: initialize with probs.
+    """
+    def __init__(self):
+        super(BernoulliProb, self).__init__()
+        self.bernoulli = nn.Bernoulli(0.5, dtype=dtype.int32)
+
+    def construct(self, value):
+        return self.bernoulli('prob', value)
+
+class BernoulliLogProb(nn.Cell):
+    """
+    Bernoulli distribution: initialize with probs.
+    """
+    def __init__(self):
+        super(BernoulliLogProb, self).__init__()
+        self.bernoulli = nn.Bernoulli(0.5, dtype=dtype.int32)
+
+    def construct(self, value):
+        return self.bernoulli('log_prob', value)
+
+
+def test_bernoulli_prob():
+    """
+    Test pmf/log_pmf: passing value through construct.
+    """
+    net = BernoulliProb()
+    value = Tensor([1, 0, 1, 0, 1], dtype=dtype.float32)
+    pmf = net(value)
+    assert isinstance(pmf, Tensor)
+
+def test_bernoulli_log_prob():
+    """
+    Test pmf/log_pmf: passing value through construct.
+    """
+    net = BernoulliLogProb()
+    value = Tensor([1, 0, 1, 0, 1], dtype=dtype.float32)
+    log_pmf = net(value)
+    assert isinstance(log_pmf, Tensor)
+
+class BernoulliProb1(nn.Cell):
+    """
+    Bernoulli distribution: initialize without probs.
+    """
+    def __init__(self):
+        super(BernoulliProb1, self).__init__()
+        self.bernoulli = nn.Bernoulli()
+
+    def construct(self, value, probs):
+        return self.bernoulli('prob', value, probs)
+
+class BernoulliLogProb1(nn.Cell):
+    """
+    Bernoulli distribution: initialize without probs.
+    """
+    def __init__(self):
+        super(BernoulliLogProb1, self).__init__()
+        self.bernoulli = nn.Bernoulli()
+
+    def construct(self, value, probs):
+        return self.bernoulli('log_prob', value, probs)
+
+
+def test_bernoulli_prob1():
+    """
+    Test pmf/log_pmf: passing probs through construct.
+    """
+    net = BernoulliProb1()
+    value = Tensor([1, 0, 1, 0, 1], dtype=dtype.float32)
+    probs = Tensor([0.3], dtype=dtype.float32)
+    pmf = net(value, probs)
+    assert isinstance(pmf, Tensor)
+
+def test_bernoulli_log_prob1():
+    """
+    Test pmf/log_pmf: passing probs through construct.
+    """
+    net = BernoulliLogProb1()
+    value = Tensor([1, 0, 1, 0, 1], dtype=dtype.float32)
+    probs = Tensor([0.3], dtype=dtype.float32)
+    log_pmf = net(value, probs)
+    assert isinstance(log_pmf, Tensor)
+
+class BernoulliProb2(nn.Cell):
+    """
+    Bernoulli distribution: initialize with probs.
+    """
+    def __init__(self):
+        super(BernoulliProb2, self).__init__()
+        self.bernoulli = nn.Bernoulli(0.5)
+
+    def construct(self, value, probs):
+        return self.bernoulli('prob', value, probs)
+
+class BernoulliLogProb2(nn.Cell):
+    """
+    Bernoulli distribution: initialize with probs.
+    """
+    def __init__(self):
+        super(BernoulliLogProb2, self).__init__()
+        self.bernoulli = nn.Bernoulli(0.5)
+
+    def construct(self, value, probs):
+        return self.bernoulli('log_prob', value, probs)
+
+
+def test_bernoulli_prob2():
+    """
+    Test pmf/log_pmf: passing probs/value through construct.
+    Overwrite original probs.
+    """
+    net = BernoulliProb2()
+    value = Tensor([1, 0, 1, 0, 1], dtype=dtype.float32)
+    probs = Tensor([0.3], dtype=dtype.float32)
+    pmf = net(value, probs)
+    assert isinstance(pmf, Tensor)
+
+def test_bernoulli_log_prob2():
+    """
+    Test pmf/log_pmf: passing probs/value through construct.
+    Overwrite original probs.
+    """
+    net = BernoulliLogProb2()
+    value = Tensor([1, 0, 1, 0, 1], dtype=dtype.float32)
+    probs = Tensor([0.3], dtype=dtype.float32)
+    log_pmf = net(value, probs)
+    assert isinstance(log_pmf, Tensor)
+
+
+class NormalKl(nn.Cell):
+    """
+    Test class: kl_loss of Normal distribution.
+    """
+    def __init__(self):
+        super(NormalKl, self).__init__()
+        self.n = nn.Normal(np.array([3.0]), np.array([4.0]), dtype=dtype.float32)
+
+    def construct(self, x_, y_):
+        return self.n('kl_loss', 'Normal', x_, y_)
+
+class BernoulliKl(nn.Cell):
+    """
+    Test class: kl_loss between Bernoulli distributions.
+    """
+    def __init__(self):
+        super(BernoulliKl, self).__init__()
+        self.b = nn.Bernoulli(0.7, dtype=dtype.int32)
+
+    def construct(self, x_):
+        return self.b('kl_loss', 'Bernoulli', x_)
+
+def test_kl():
+    """
+    Test kl_loss function.
+    """
+    nor_net = NormalKl()
+    mean_b = np.array([1.0]).astype(np.float32)
+    sd_b = np.array([1.0]).astype(np.float32)
+    mean = Tensor(mean_b, dtype=dtype.float32)
+    sd = Tensor(sd_b, dtype=dtype.float32)
+    loss = nor_net(mean, sd)
+    assert isinstance(loss, Tensor)
+
+    ber_net = BernoulliKl()
+    probs_b = Tensor([0.3], dtype=dtype.float32)
+    loss = ber_net(probs_b)
+    assert isinstance(loss, Tensor)
+
+
+class NormalKlNoArgs(nn.Cell):
+    """
+    Test class: kl_loss of Normal distribution.
+    No args during initialization.
+    """
+    def __init__(self):
+        super(NormalKlNoArgs, self).__init__()
+        self.n = nn.Normal(dtype=dtype.float32)
+
+    def construct(self, x_, y_, w_, v_):
+        return self.n('kl_loss', 'Normal', x_, y_, w_, v_)
+
+class BernoulliKlNoArgs(nn.Cell):
+    """
+    Test class: kl_loss between Bernoulli distributions.
+    No args during initialization.
+    """
+    def __init__(self):
+        super(BernoulliKlNoArgs, self).__init__()
+        self.b = nn.Bernoulli(dtype=dtype.int32)
+
+    def construct(self, x_, y_):
+        return self.b('kl_loss', 'Bernoulli', x_, y_)
+
+def test_kl_no_args():
+    """
+    Test kl_loss function.
+    """
+    nor_net = NormalKlNoArgs()
+    mean_b = np.array([1.0]).astype(np.float32)
+    sd_b = np.array([1.0]).astype(np.float32)
+    mean_a = np.array([2.0]).astype(np.float32)
+    sd_a = np.array([3.0]).astype(np.float32)
+    mean_b = Tensor(mean_b, dtype=dtype.float32)
+    sd_b = Tensor(sd_b, dtype=dtype.float32)
+    mean_a = Tensor(mean_a, dtype=dtype.float32)
+    sd_a = Tensor(sd_a, dtype=dtype.float32)
+    loss = nor_net(mean_b, sd_b, mean_a, sd_a)
+    assert isinstance(loss, Tensor)
+
+    ber_net = BernoulliKlNoArgs()
+    probs_b = Tensor([0.3], dtype=dtype.float32)
+    probs_a = Tensor([0.7], dtype=dtype.float32)
+    loss = ber_net(probs_b, probs_a)
+    assert isinstance(loss, Tensor)
+
+
+
+class NormalBernoulli(nn.Cell):
+    """
+    Test class: basic mean/sd function.
+    """
+    def __init__(self):
+        super(NormalBernoulli, self).__init__()
+        self.n = nn.Normal(3.0, 4.0, dtype=dtype.float32)
+        self.b = nn.Bernoulli(0.5, dtype=dtype.int32)
+
+    def construct(self):
+        normal_mean = self.n('mean')
+        normal_sd = self.n('sd')
+        bernoulli_mean = self.b('mean')
+        bernoulli_sd = self.b('sd')
+        return normal_mean, normal_sd, bernoulli_mean, bernoulli_sd
+
+def test_bascis():
+    """
+    Test mean/sd functionality of Normal and Bernoulli.
+    """
+    net = NormalBernoulli()
+    normal_mean, normal_sd, bernoulli_mean, bernoulli_sd = net()
+    assert isinstance(normal_mean, Tensor)
+    assert isinstance(normal_sd, Tensor)
+    assert isinstance(bernoulli_mean, Tensor)
+    assert isinstance(bernoulli_sd, Tensor)
diff --git a/tests/ut/python/nn/test_msssim.py b/tests/ut/python/nn/test_msssim.py
new file mode 100644
index 00000000000..b85d13c927e
--- /dev/null
+++ b/tests/ut/python/nn/test_msssim.py
@@ -0,0 +1,135 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+test msssim
+"""
+import numpy as np
+import pytest
+
+import mindspore.common.dtype as mstype
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.common.api import _executor
+
+_MSSSIM_WEIGHTS = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333)
+
+class MSSSIMNet(nn.Cell):
+    def __init__(self, max_val=1.0, power_factors=_MSSSIM_WEIGHTS, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03):
+        super(MSSSIMNet, self).__init__()
+        self.net = nn.MSSSIM(max_val, power_factors, filter_size, filter_sigma, k1, k2)
+
+    def construct(self, img1, img2):
+        return self.net(img1, img2)
+
+
+def test_compile():
+    factors = (0.033, 0.033, 0.033)
+    net = MSSSIMNet(power_factors=factors)
+    img1 = Tensor(np.random.random((8, 3, 128, 128)))
+    img2 = Tensor(np.random.random((8, 3, 128, 128)))
+    _executor.compile(net, img1, img2)
+
+
+def test_compile_grayscale():
+    max_val = 255
+    factors = (0.033, 0.033, 0.033)
+    net = MSSSIMNet(max_val=max_val, power_factors=factors)
+    img1 = Tensor(np.random.randint(0, 256, (8, 3, 128, 128), np.uint8))
+    img2 = Tensor(np.random.randint(0, 256, (8, 3, 128, 128), np.uint8))
+    _executor.compile(net, img1, img2)
+
+
+def test_msssim_max_val_negative():
+    max_val = -1
+    with pytest.raises(ValueError):
+        _ = MSSSIMNet(max_val)
+
+
+def test_msssim_max_val_bool():
+    max_val = True
+    with pytest.raises(TypeError):
+        _ = MSSSIMNet(max_val)
+
+
+def test_msssim_max_val_zero():
+    max_val = 0
+    with pytest.raises(ValueError):
+        _ = MSSSIMNet(max_val)
+
+
+def test_msssim_power_factors_set():
+    with pytest.raises(TypeError):
+        _ = MSSSIMNet(power_factors={0.033, 0.033, 0.033})
+
+
+def test_msssim_filter_size_float():
+    with pytest.raises(TypeError):
+        _ = MSSSIMNet(filter_size=1.1)
+
+
+def test_msssim_filter_size_zero():
+    with pytest.raises(ValueError):
+        _ = MSSSIMNet(filter_size=0)
+
+
+def test_msssim_filter_sigma_zero():
+    with pytest.raises(ValueError):
+        _ = MSSSIMNet(filter_sigma=0.0)
+
+
+def test_msssim_filter_sigma_negative():
+    with pytest.raises(ValueError):
+        _ = MSSSIMNet(filter_sigma=-0.1)
+
+
+def test_msssim_different_shape():
+    shape_1 = (8, 3, 128, 128)
+    shape_2 = (8, 3, 256, 256)
+    factors = (0.033, 0.033, 0.033)
+    img1 = Tensor(np.random.random(shape_1))
+    img2 = Tensor(np.random.random(shape_2))
+    net = MSSSIMNet(power_factors=factors)
+    with pytest.raises(ValueError):
+        _executor.compile(net, img1, img2)
+
+
+def test_msssim_different_dtype():
+    dtype_1 = mstype.float32
+    dtype_2 = mstype.float16
+    factors = (0.033, 0.033, 0.033)
+    img1 = Tensor(np.random.random((8, 3, 128, 128)), dtype=dtype_1)
+    img2 = Tensor(np.random.random((8, 3, 128, 128)), dtype=dtype_2)
+    net = MSSSIMNet(power_factors=factors)
+    with pytest.raises(TypeError):
+        _executor.compile(net, img1, img2)
+
+
+def test_msssim_invalid_5d_input():
+    shape_1 = (8, 3, 128, 128)
+    shape_2 = (8, 3, 256, 256)
+    invalid_shape = (8, 3, 128, 128, 1)
+    factors = (0.033, 0.033, 0.033)
+    img1 = Tensor(np.random.random(shape_1))
+    invalid_img1 = Tensor(np.random.random(invalid_shape))
+    img2 = Tensor(np.random.random(shape_2))
+    invalid_img2 = Tensor(np.random.random(invalid_shape))
+
+    net = MSSSIMNet(power_factors=factors)
+    with pytest.raises(ValueError):
+        _executor.compile(net, invalid_img1, img2)
+    with pytest.raises(ValueError):
+        _executor.compile(net, img1, invalid_img2)
+    with pytest.raises(ValueError):
+        _executor.compile(net, invalid_img1, invalid_img2)
diff --git a/tests/ut/python/nn/test_ssim.py b/tests/ut/python/nn/test_ssim.py
index 5cf1b0c94c4..8b7e4410141 100644
--- a/tests/ut/python/nn/test_ssim.py
+++ b/tests/ut/python/nn/test_ssim.py
@@ -78,26 +78,6 @@ def test_ssim_filter_sigma_negative():
         _ = SSIMNet(filter_sigma=-0.1)
 
 
-def test_ssim_k1_k2_wrong_value():
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k1=1.1)
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k1=1.0)
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k1=0.0)
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k1=-1.0)
-
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k2=1.1)
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k2=1.0)
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k2=0.0)
-    with pytest.raises(ValueError):
-        _ = SSIMNet(k2=-1.0)
-
-
 def test_ssim_different_shape():
     shape_1 = (8, 3, 16, 16)
     shape_2 = (8, 3, 8, 8)
diff --git a/tests/ut/python/ops/test_control_ops.py b/tests/ut/python/ops/test_control_ops.py
index 064512b19a0..53b42b8f669 100644
--- a/tests/ut/python/ops/test_control_ops.py
+++ b/tests/ut/python/ops/test_control_ops.py
@@ -600,3 +600,42 @@ def test_while_tensor():
     x = Tensor(np.ones([6, 8, 10], np.int32))
     y = Tensor(np.ones([6, 8, 10], np.int32))
     out = net(x, y)
+
+
+def test_large_for_loop():
+    class Net(nn.Cell):
+        def __init__(self):
+            super(Net, self).__init__()
+            self.flatten = P.ReLU() #nn.Flatten()
+
+        def construct(self, x):
+            for elem in range(1, 19000):
+                x = self.flatten(x + elem)
+            return x
+
+    t = Tensor(np.ones([2, 3], dtype=np.float32))
+    net = Net()
+    net(t)
+
+
+def test_large_for_loop_with_continue_break():
+    class Net(nn.Cell):
+        def __init__(self):
+            super(Net, self).__init__()
+            self.flatten = P.ReLU() #nn.Flatten()
+
+        def construct(self, x):
+            idx = 0
+            for elem1 in range(200):
+                idx = idx + 1
+                if idx < 10:
+                    x = x + 0.5
+                    continue
+                if idx > 500:
+                    break
+                x = self.flatten(x + elem1)
+            return x
+
+    t = Tensor(np.ones([2, 3], dtype=np.float32))
+    net = Net()
+    net(t)
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index 029d49fe1cb..31ca540f740 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -649,6 +649,15 @@ def test_strided_slice_const():
     assert (ret.asnumpy() == np.array([], np.float32).reshape([0, 1, 7, 8, 9, 3, 1])).all()
 
 
+class ParallelConcatNet(nn.Cell):
+    def __init__(self):
+        super(ParallelConcatNet, self).__init__()
+        self.parallel_concat = P.ParallelConcat()
+
+    def construct(self, x1, x2):
+        return self.parallel_concat((x1, x2))
+
+
 test_case_math_ops = [
     ('BitwiseAnd', {
         'block': P.BitwiseAnd(),
@@ -1391,6 +1400,11 @@ test_case_nn_ops = [
         'desc_const': [4],
         'desc_inputs': [[3, 2, 1, 3], Tensor(np.array([1, 2, 3]).astype(np.int32))],
         'desc_bprop': [[4, 2, 1, 3]]}),
+    ('UnsortedSegmentProd', {
+        'block': P.UnsortedSegmentProd(),
+        'desc_const': [4],
+        'desc_inputs': [[3, 2, 1, 3], Tensor(np.array([0, 1, 0]).astype(np.int32))],
+        'desc_bprop': [[4, 2, 1, 3]]}),
     ('DropoutGenMask', {
         'block': P.DropoutGenMask(),
         'desc_const': [(2, 2), Tensor(0.5, mstype.float32)],
@@ -1948,6 +1962,12 @@ test_case_array_ops = [
         'desc_inputs': [[1, 3, 24, 24]],
         'desc_bprop': [[1, 12, 24, 24]],
     }),
+    ('ParallelConcat', {
+        'block': ParallelConcatNet(),
+        'desc_inputs': [Tensor([[1, 2]], mstype.float32),
+                        Tensor([[5, 6]], mstype.float32)],
+        'skip': ['backward'],
+    }),
 ]
 
 test_case_other_ops = [
@@ -2216,7 +2236,10 @@ test_case_other_ops = [
         'desc_inputs': [Tensor(np.array([1.1]).astype(np.float32)),
                         Tensor(np.array([1.2]).astype(np.float32))],
         'skip': ['backward']}),
-
+    ('PopulationCount', {
+        'block': P.PopulationCount(),
+        'desc_inputs': [Tensor(np.array([1, 2, 3]).astype(np.int16))],
+        'skip': ['backward']}),
 ]
 
 test_case_quant_ops = [
diff --git a/tests/ut/python/optimizer/test_python_pass.py b/tests/ut/python/optimizer/test_python_pass.py
new file mode 100644
index 00000000000..c3ce3d6c4e3
--- /dev/null
+++ b/tests/ut/python/optimizer/test_python_pass.py
@@ -0,0 +1,64 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import numpy as np
+
+import mindspore
+import mindspore.nn as nn
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.ops import operations as P
+from mindspore.common.python_pass_register import registe_pass, PyPassManager
+from mindspore.common.api import _generate_pip_args
+from mindspore._c_expression import generate_key, Executor_
+
+context.set_context(mode=context.GRAPH_MODE)
+
+def get_func_graph(obj, *args, phase="predict"):
+    args_names, args_list = _generate_pip_args(obj, *args)
+    dic = dict(zip(args_names, args_list))
+    key = generate_key(phase, dic)
+    phase_prefix = str(key[1])
+    if phase == 'export':
+        phase = phase + '.' + phase_prefix + '.' + str(obj.create_time)
+    else:
+        phase = phase_prefix + phase + '.' + str(obj.create_time)
+    _executor = Executor_.get_instance()
+    _executor.compile(obj, args_list, phase, False)
+    return _executor.get_func_graph(phase)
+
+def test_softmax_relu():
+    """
+    Use python pass to transform from Softmax to ReLU.
+    """
+    inputs = Tensor(np.ones([42]), mindspore.float16)
+    softmax_model = nn.Softmax()
+
+    @registe_pass(run_only_once=True)
+    def softmax_relu_pass():
+        softmax = P.Softmax()
+        relu = P.ReLU()
+        def pattern(x):
+            x = softmax(x)
+            return x
+        def target(x):
+            x = relu(x)
+            return x
+        return pattern, target
+
+    transformed_repr = get_func_graph(softmax_model, inputs).get_return().expanded_str(2)
+    ppm = PyPassManager()
+    ppm.unregiste(softmax_relu_pass)
+    assert "ReLU" in transformed_repr
+    assert "Softmax" not in transformed_repr
diff --git a/tests/ut/python/parallel/test_embeddinglookup.py b/tests/ut/python/parallel/test_embeddinglookup.py
index 4ab5f5f8788..db84ab26eb3 100644
--- a/tests/ut/python/parallel/test_embeddinglookup.py
+++ b/tests/ut/python/parallel/test_embeddinglookup.py
@@ -19,7 +19,6 @@ import mindspore.nn as nn
 from mindspore.common.api import _executor
 from mindspore.ops import operations as P
 from mindspore.ops import composite as C
-from mindspore.ops.operations import _inner_ops as inner
 from mindspore import Tensor, context
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 
@@ -42,17 +41,15 @@ class NetWithLoss(nn.Cell):
         return self.loss(predict)
 
 class Net(nn.Cell):
-    def __init__(self, shape, offset, reduce_scatter_flag, split_num):
+    def __init__(self, shape, offset, strategy1=None, strategy2=None, target="Device"):
         super().__init__()
         self.index = Tensor(np.ones(shape), dtype=ms.int32)
         self.offset = offset
-        self.reduce_scatter_flag = reduce_scatter_flag
-        self.split_num = split_num
-        self.elu = inner.EmbeddingLookup()
-        self.mm = P.BatchMatMul()
+        self.elu = P.EmbeddingLookup().set_strategy(strategy1).add_prim_attr("primitive_target", target)
+        self.mm = P.BatchMatMul().set_strategy(strategy2)
 
     def construct(self, x, y):
-        out = self.elu(x, self.index, self.offset, self.reduce_scatter_flag, self.split_num)
+        out = self.elu(x, self.index, self.offset)
         out = self.mm(out, y)
         return out
 
@@ -60,9 +57,7 @@ class Net(nn.Cell):
 def test_embeddinglookup_reducescatter_false():
     shape = [8, 8]
     offset = 8
-    reduce_scatter_flag = False
-    split_num = 1
-    net = NetWithLoss(Net(shape, offset, reduce_scatter_flag, split_num))
+    net = NetWithLoss(Net(shape, offset))
     net.set_auto_parallel()
 
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
@@ -71,11 +66,9 @@ def test_embeddinglookup_reducescatter_false():
 
 
 def test_embeddinglookup_reducescatter_true():
-    shape = [64, 8]
+    shape = [8, 8]
     offset = 8
-    reduce_scatter_flag = True
-    split_num = 8
-    net = NetWithLoss(Net(shape, offset, reduce_scatter_flag, split_num))
+    net = NetWithLoss(Net(shape, offset))
     net.set_auto_parallel()
 
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
@@ -86,9 +79,7 @@ def test_embeddinglookup_reducescatter_true():
 def test_embeddinglookup_reducescatter_false_grad():
     shape = [8, 8]
     offset = 8
-    reduce_scatter_flag = False
-    split_num = 1
-    net = GradWrap(NetWithLoss(Net(shape, offset, reduce_scatter_flag, split_num)))
+    net = GradWrap(NetWithLoss(Net(shape, offset)))
     net.set_auto_parallel()
 
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
@@ -98,13 +89,39 @@ def test_embeddinglookup_reducescatter_false_grad():
 
 def test_embeddinglookup_reducescatter_true_grad():
     context.set_context(save_graphs=True)
-    shape = [64, 8]
+    shape = [8, 8]
     offset = 8
-    reduce_scatter_flag = True
-    split_num = 8
-    net = GradWrap(NetWithLoss(Net(shape, offset, reduce_scatter_flag, split_num)))
+    net = GradWrap(NetWithLoss(Net(shape, offset)))
     net.set_auto_parallel()
 
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([8, 32, 8]), dtype=ms.float32)
     _executor.compile(net, x, y)
+
+
+def test_embeddinglookup_semi_auto1():
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
+    shape = [64, 32]
+    offset = 0
+    strategy1 = ((8, 1), (1, 1))
+    strategy2 = ((4, 1, 2), (4, 2, 1))
+    net = GradWrap(NetWithLoss(Net(shape, offset, strategy1, strategy2, "CPU")))
+
+    net.set_auto_parallel()
+    x = Tensor(np.ones([64, 64]), dtype=ms.float32)
+    y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32)
+    _executor.compile(net, x, y)
+
+
+def test_embeddinglookup_semi_auto2():
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
+    shape = [64, 32]
+    offset = 0
+    strategy1 = ((1, 8), (1, 1))
+    strategy2 = ((4, 1, 2), (4, 2, 1))
+    net = GradWrap(NetWithLoss(Net(shape, offset, strategy1, strategy2, "CPU")))
+
+    net.set_auto_parallel()
+    x = Tensor(np.ones([64, 64]), dtype=ms.float32)
+    y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32)
+    _executor.compile(net, x, y)
diff --git a/tests/ut/python/parallel/test_gather_v2.py b/tests/ut/python/parallel/test_gather_v2.py
index 5d52089cbec..2e853875bf6 100644
--- a/tests/ut/python/parallel/test_gather_v2.py
+++ b/tests/ut/python/parallel/test_gather_v2.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-
 import mindspore as ms
 import mindspore.nn as nn
 from mindspore import Tensor
@@ -182,39 +181,3 @@ def test_gatherv2_auto1():
     x = Tensor(np.ones([64, 32]), dtype=ms.float32)
     y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32)
     _executor.compile(net, x, y)
-
-
-def test_gatherv2_cpu0():
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
-    strategy1 = ((8, 1), (1, 1))
-    strategy2 = ((4, 2, 1), (4, 2, 1))
-    net = NetWithLoss(Net(0, strategy1, strategy2, None, "CPU"))
-    net.set_auto_parallel()
-
-    x = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y)
-
-
-def test_gatherv2_cpu1():
-    context.set_auto_parallel_context(device_num=16, global_rank=0, parallel_mode="semi_auto_parallel")
-    strategy1 = ((16, 1), (1, 1))
-    strategy2 = ((4, 2, 1), (4, 2, 1))
-    net = NetWithLoss(Net(0, strategy1, strategy2, None, "CPU"))
-    net.set_auto_parallel()
-
-    x = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y)
-
-
-def test_gatherv2_cpu2():
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
-    strategy1 = ((1, 8), (1, 1))
-    strategy2 = ((4, 2, 1), (4, 2, 1))
-    net = NetWithLoss(Net(0, strategy1, strategy2, None, "CPU"))
-    net.set_auto_parallel()
-
-    x = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    y = Tensor(np.ones([64, 64, 64]), dtype=ms.float32)
-    _executor.compile(net, x, y)
diff --git a/tests/ut/python/parallel/test_manual_gatherv2.py b/tests/ut/python/parallel/test_manual_gatherv2.py
new file mode 100644
index 00000000000..21d25ae720a
--- /dev/null
+++ b/tests/ut/python/parallel/test_manual_gatherv2.py
@@ -0,0 +1,61 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+import mindspore as ms
+from mindspore import context, Tensor, Parameter
+from mindspore.common.api import _executor
+from mindspore.nn import Cell, TrainOneStepCell, Momentum
+from mindspore.ops import operations as P
+from mindspore.common.initializer import initializer
+
+class Net(Cell):
+    def __init__(self, strategy1=None, strategy2=None, strategy3=None):
+        super().__init__()
+        self.gatherv2 = P.GatherV2().set_strategy(strategy1)
+        self.gatherv2.add_prim_attr("manual_split", ((1, 0), (7, 1)))
+        self.mul = P.Mul().set_strategy(strategy2)
+        self.reshape = P.Reshape()
+        self.matmul = P.MatMul().set_strategy(strategy3)
+        self.matmul.add_prim_attr("forward_reduce_scatter", True)
+        self.param = Parameter(initializer("ones", (8, 64), ms.float32), name="gatherv2_param")
+        self.mul_weight = Parameter(initializer("ones", (2, 4, 64), ms.float32), name="mul_weight")
+        self.matmul_weight = Parameter(initializer("ones", (256, 16), ms.float32), name="matmul_weight")
+
+    def construct(self, x, b):
+        out = self.gatherv2(self.param, x, 0)
+        out = self.mul(out, self.mul_weight)
+        out = self.reshape(out, (2, 256))
+        out = self.matmul(out, self.matmul_weight)
+        return out
+
+_x = Tensor(np.ones([2, 4]), dtype=ms.int32)
+_b = Tensor(np.ones([64, 8]), dtype=ms.float32)
+
+def compile_net(net):
+    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
+    train_net = TrainOneStepCell(net, optimizer)
+    train_net.set_auto_parallel()
+    _executor.compile(train_net, _x, _b)
+    context.reset_auto_parallel_context()
+
+def test_neg_data_parallel():
+    context.set_context(save_graphs=True)
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=2, global_rank=0)
+    strategy1 = ((2, 1), (1, 2))
+    strategy2 = ((1, 2, 1), (1, 2, 1))
+    strategy3 = ((1, 2), (2, 1))
+    net = Net(strategy1, strategy2, strategy3)
+    compile_net(net)
diff --git a/tests/ut/python/parallel/test_sparse_gather_v2.py b/tests/ut/python/parallel/test_sparse_gather_v2.py
index dd0517a08ec..2d4d0c2bf28 100644
--- a/tests/ut/python/parallel/test_sparse_gather_v2.py
+++ b/tests/ut/python/parallel/test_sparse_gather_v2.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
+import pytest
 
 import mindspore as ms
 import mindspore.nn as nn
@@ -184,6 +185,7 @@ def test_gatherv2_auto1():
     _executor.compile(net, x, y)
 
 
+@pytest.mark.skip(reason="The transition from GatherV2 to EmbeddingLookup needs adjusting. by lichen")
 def test_gatherv2_cpu0():
     context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
     strategy1 = ((8, 1), (1, 1))
@@ -196,6 +198,7 @@ def test_gatherv2_cpu0():
     _executor.compile(net, x, y)
 
 
+@pytest.mark.skip(reason="The transition from GatherV2 to EmbeddingLookup needs adjusting. by lichen")
 def test_gatherv2_cpu1():
     context.set_auto_parallel_context(device_num=16, global_rank=0, parallel_mode="semi_auto_parallel")
     strategy1 = ((16, 1), (1, 1))
@@ -208,6 +211,7 @@ def test_gatherv2_cpu1():
     _executor.compile(net, x, y)
 
 
+@pytest.mark.skip(reason="The transition from GatherV2 to EmbeddingLookup needs adjusting. by lichen")
 def test_gatherv2_cpu2():
     context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
     strategy1 = ((1, 8), (1, 1))
diff --git a/tests/ut/python/parameter_feature/test_var_grad.py b/tests/ut/python/parameter_feature/test_var_grad.py
index 7a332b1c3ba..f0358394e76 100644
--- a/tests/ut/python/parameter_feature/test_var_grad.py
+++ b/tests/ut/python/parameter_feature/test_var_grad.py
@@ -22,7 +22,7 @@ from mindspore.common.parameter import ParameterTuple
 from mindspore.nn import Cell
 from mindspore.ops import operations as P
 
-context.set_context(mode=context.GRAPH_MODE)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
 
 
 def test_net_vargs_expand():
@@ -184,6 +184,27 @@ def test_grad_var_args_with_sens():
     _ = grad_net(x, y, sens)
 
 
+def test_grad_with_param_sens():
+    """"test grad_with_sens parameter"""
+
+    class GradNet(Cell):
+        def __init__(self, net):
+            super(GradNet, self).__init__()
+            self.weights = ParameterTuple(net.trainable_params())
+            self.net = net
+            self.sens = Parameter(Tensor(np.ones([3, 4, 5]), dtype=mstype.float32), name='sens', requires_grad=False)
+            self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
+
+        def construct(self, x, y):
+            return self.grad(self.net, self.weights)(x, y, self.sens)
+
+    x = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
+    y = Tensor(np.ones([3, 4, 5]), dtype=mstype.float32)
+    net = SecondNet()
+    grad_net = GradNet(net)
+    _ = grad_net(x, y)
+
+
 def test_var_args_grad():
     class VarNet(Cell):
         def __init__(self, net):
diff --git a/tests/ut/python/pipeline/infer/test_hypermap_specialize.py b/tests/ut/python/pipeline/infer/test_hypermap_specialize.py
index 1f669f73554..c292e3662db 100644
--- a/tests/ut/python/pipeline/infer/test_hypermap_specialize.py
+++ b/tests/ut/python/pipeline/infer/test_hypermap_specialize.py
@@ -53,4 +53,4 @@ def test_hypermap_specialize_param():
 
     expected_ret = (Tensor(np.full(1, 5).astype(np.int32)), Tensor(np.full(2, 5).astype(np.int32)))
     ret = hypermap_specialize_param()
-    assert ret == (expected_ret, expected_ret)
+    assert ret == (expected_ret, list(expected_ret))
diff --git a/tests/ut/python/pipeline/infer/test_net_infer.py b/tests/ut/python/pipeline/infer/test_net_infer.py
index 6b32a7617d2..9c19f213f56 100644
--- a/tests/ut/python/pipeline/infer/test_net_infer.py
+++ b/tests/ut/python/pipeline/infer/test_net_infer.py
@@ -45,6 +45,7 @@ def test_net_infer():
 
 
 def test_assign_in_while():
+    context.set_context(device_target="Ascend")
     context.set_context(mode=context.GRAPH_MODE)
     class Net(nn.Cell):
         def __init__(self, input_shape):
diff --git a/tests/ut/python/pynative_mode/test_cell_bprop.py b/tests/ut/python/pipeline/parse/test_cell_bprop.py
similarity index 95%
rename from tests/ut/python/pynative_mode/test_cell_bprop.py
rename to tests/ut/python/pipeline/parse/test_cell_bprop.py
index 09a096a0907..e896ddc9ac7 100644
--- a/tests/ut/python/pynative_mode/test_cell_bprop.py
+++ b/tests/ut/python/pipeline/parse/test_cell_bprop.py
@@ -16,6 +16,7 @@
 import numpy as np
 import pytest
 
+import mindspore as ms
 import mindspore.common.dtype as mstype
 import mindspore.nn as nn
 from mindspore import Parameter
@@ -24,12 +25,14 @@ from mindspore.common.initializer import initializer
 from mindspore.common.tensor import Tensor
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
-from ....mindspore_test_framework.utils.bprop_util import bprop
+from .....mindspore_test_framework.utils.bprop_util import bprop
 
 
 def setup_module(module):
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
 
+def teardown_module(module):
+    context.set_context(device_target="Ascend")
 
 class MulAdd(nn.Cell):
     def __init__(self):
@@ -45,7 +48,9 @@ class MulAdd(nn.Cell):
 
 def test_grad_mul_add():
     mul_add = MulAdd()
-    assert C.grad_all(mul_add)(1, 2) == (2, 4)
+    x = Tensor(1, dtype=ms.int32)
+    y = Tensor(2, dtype=ms.int32)
+    assert C.grad_all(mul_add)(x, y) == (2, 4)
 
 
 class InlineMulADD(nn.Cell):
@@ -60,7 +65,9 @@ class InlineMulADD(nn.Cell):
 
 def test_grad_inline_mul_add():
     inline_mul_add = InlineMulADD()
-    assert C.grad_all(inline_mul_add)(1, 2) == (3, 6)
+    x = Tensor(1, dtype=ms.int32)
+    y = Tensor(2, dtype=ms.int32)
+    assert C.grad_all(inline_mul_add)(x, y) == (3, 6)
 
 
 class WithParameter(nn.Cell):
@@ -93,7 +100,9 @@ class WithNoBprop(nn.Cell):
 
 def test_with_no_bprop():
     with_no_bprop = WithNoBprop()
-    assert C.grad_all(with_no_bprop)(1, 2) == (2, 1)
+    x = Tensor(1, dtype=ms.int32)
+    y = Tensor(2, dtype=ms.int32)
+    assert C.grad_all(with_no_bprop)(x, y) == (2, 1)
 
 
 def test_grad_in_bprop_1():
diff --git a/tests/ut/python/pipeline/parse/test_enumerate.py b/tests/ut/python/pipeline/parse/test_enumerate.py
index cd808696f1d..37f9c603dfd 100644
--- a/tests/ut/python/pipeline/parse/test_enumerate.py
+++ b/tests/ut/python/pipeline/parse/test_enumerate.py
@@ -91,6 +91,7 @@ def test_enumerate_tuple_parameter():
                 index_sum += i
                 ret += (j,)
             return index_sum, ret
+
     x = Tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5)))
     net = Net()
     net(x, x, x)
@@ -127,10 +128,12 @@ def test_enumerate_tuple_parameter_1():
                 index_sum += i[0]
                 ret += (i[1],)
             return index_sum, ret
+
     x = Tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5)))
     net = Net()
     net(x, x, x)
 
+
 def test_enumerate_tuple_const_2():
     class Net(nn.Cell):
         def __init__(self):
@@ -162,20 +165,37 @@ def test_enumerate_tuple_parameter_2():
                 index_sum += i[0]
                 ret += (i[1],)
             return index_sum, ret
+
     x = Tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5)))
     net = Net()
     net(x, x, x)
 
 
-def test_enumerate_parameter_type_error():
+def test_enumerate_first_input_type_error():
     class Net(nn.Cell):
         def __init__(self):
             super(Net, self).__init__()
 
         def construct(self, x):
             return enumerate(x)
+
     x = Tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5)))
     net = Net()
     with pytest.raises(TypeError) as ex:
         net(x)
-    assert  "For 'enumerate', the input parameter should be tuple or list" in str(ex.value)
+    assert "For 'enumerate', the 'first input'" in str(ex.value)
+
+
+def test_enumerate_start_type_error():
+    class Net(nn.Cell):
+        def __init__(self):
+            super(Net, self).__init__()
+
+        def construct(self, x):
+            return enumerate(x, start=1.2)
+
+    x = Tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5)))
+    net = Net()
+    with pytest.raises(TypeError) as ex:
+        net((x, x))
+    assert "For 'enumerate', the 'start'" in str(ex.value)
diff --git a/tests/ut/python/pipeline/parse/test_for_stmt.py b/tests/ut/python/pipeline/parse/test_for_stmt.py
index 4930dae796d..748c73e8738 100644
--- a/tests/ut/python/pipeline/parse/test_for_stmt.py
+++ b/tests/ut/python/pipeline/parse/test_for_stmt.py
@@ -17,6 +17,9 @@ from dataclasses import dataclass
 import numpy as np
 
 from mindspore import Tensor, Model, context
+from mindspore.ops import operations as P
+from mindspore.ops import composite as C
+from mindspore.ops import functional as F
 from mindspore.nn import Cell
 from mindspore.nn import ReLU
 from ...ut_filter import non_graph_engine
@@ -66,3 +69,58 @@ def function_access_base(number):
 def test_access_0040():
     """ test_access_0040 """
     function_access_base(2)
+
+
+class OpSeqNet(Cell):
+    def __init__(self, loop_count=1):
+        super().__init__()
+        self.loop_count = loop_count
+        self.op_seq = (P.Sqrt(), P.Reciprocal(), P.Square())
+
+    def construct(self, x):
+        t = x
+        for op in self.op_seq:
+            t = op(t)
+        return t
+
+
+def test_op_seq_test():
+    context.set_context(mode=context.GRAPH_MODE)
+    net = OpSeqNet()
+    input_np = np.random.randn(2, 3, 4, 5).astype(np.float32)
+    input_me = Tensor(input_np)
+    net(input_me)
+
+
+_grad_fusion = C.MultitypeFuncGraph("grad_fushion")
+
+
+@_grad_fusion.register("Tensor", "Function")
+def tensor_grad_scale(x, op):
+    return op(x)
+
+
+class AllReduceTest(Cell):
+    def __init__(self, loop_count=1):
+        super().__init__()
+        self.op_list = ()
+        self.fushion_flag = [0, 1, 1, 0, 1, 0]
+        for i in self.fushion_flag:
+            op = P.AllReduce().add_prim_attr('fusion', i)
+            self.op_list = self.op_list + (op,)
+        self.hyper_map = C.HyperMap()
+
+    def construct(self, x):
+        ret = ()
+        for _ in self.fushion_flag:
+            ret = ret + (x,)
+        fushion_res = self.hyper_map(F.partial(_grad_fusion), ret, self.op_list)
+        return fushion_res
+
+
+def test_allreduce_fushio_test():
+    context.set_context(mode=context.GRAPH_MODE)
+    net = AllReduceTest()
+    input_np = np.random.randn(2, 3, 4, 5).astype(np.float32)
+    input_me = Tensor(input_np)
+    net(input_me)
diff --git a/tests/ut/python/pipeline/parse/test_parse.py b/tests/ut/python/pipeline/parse/test_parse.py
index bbc32d0728d..b295adcbeca 100644
--- a/tests/ut/python/pipeline/parse/test_parse.py
+++ b/tests/ut/python/pipeline/parse/test_parse.py
@@ -19,21 +19,27 @@
 @Desc  :
 """
 import logging
+import pytest
 import numpy as np
 
 import mindspore as ms
 import mindspore.nn as nn
 from mindspore import Tensor
+from mindspore import context
+from mindspore.ops import composite as C
 from mindspore.common.api import ms_function, _executor
+from mindspore.ops._grad.grad_base import bprop_getters
+from mindspore.ops.primitive import prim_attr_register, PrimitiveWithInfer
 from mindspore.ops.functional import tensor_add
 from ...ut_filter import non_graph_engine
 
-# pylint: disable=W0613
+# pylint: disable=W0613,W0612
 # W0613: unused-argument
 
 
 log = logging.getLogger("test")
 log.setLevel(level=logging.ERROR)
+context.set_context(mode=context.GRAPH_MODE)
 
 
 # Test case: use the parse obj interface use default parameter
@@ -135,3 +141,113 @@ def test_net_with_ndarray():
     input_data = np.array([[1.2, 2.1], [2.2, 3.2]]).astype('float32')
 
     net(ms.Tensor(input_data))
+
+
+def test_bprop_with_wrong_output_num():
+    context.set_context(check_bprop=True)
+    class BpropWithWrongOutputNum(PrimitiveWithInfer):
+        @prim_attr_register
+        def __init__(self):
+            super(BpropWithWrongOutputNum, self).__init__('BpropWithWrongOutputNum')
+
+        def __call__(self, x, y):
+            return x
+
+        def infer_shape(self, x_shape, yshape):
+            return x_shape
+
+        def infer_dtype(self, x_type, y_type):
+            return x_type
+
+    @bprop_getters.register(BpropWithWrongOutputNum)
+    def get_bprop_with_wrong_output_num(self):
+        """Generate bprop for BpropWithWrongOutputNum"""
+
+        def bprop(x, y, out, dout):
+            return (dout,)
+
+        return bprop
+
+    class BpropWithWrongOutputNumCell(nn.Cell):
+        def __init__(self):
+            super(BpropWithWrongOutputNumCell, self).__init__()
+
+        def construct(self, x, y):
+            return BpropWithWrongOutputNum()(x, y)
+
+    with pytest.raises(TypeError):
+        C.grad_all(BpropWithWrongOutputNumCell())(1, 2)
+
+def test_bprop_with_wrong_output_type():
+    context.set_context(check_bprop=True)
+    class BpropWithWrongOutputType(PrimitiveWithInfer):
+        @prim_attr_register
+        def __init__(self):
+            super(BpropWithWrongOutputType, self).__init__('BpropWithWrongOutputType')
+
+        def __call__(self, x):
+            return x
+
+        def infer_shape(self, x_shape):
+            return x_shape
+
+        def infer_dtype(self, x_type):
+            return x_type
+
+    @bprop_getters.register(BpropWithWrongOutputType)
+    def get_bprop_with_wrong_output_type(self):
+        """Generate bprop for BpropWithWrongOutputType"""
+
+        def bprop(x, out, dout):
+            return (1,)
+
+        return bprop
+
+    class BpropWithWrongOutputTypeCell(nn.Cell):
+        def __init__(self):
+            super(BpropWithWrongOutputTypeCell, self).__init__()
+
+        def construct(self, x):
+            return BpropWithWrongOutputType()(x)
+
+    with pytest.raises(TypeError):
+        C.grad_all(BpropWithWrongOutputTypeCell())(Tensor(np.ones([64, 10]).astype(np.int32)))
+
+
+def test_bprop_with_wrong_output_shape():
+    context.set_context(check_bprop=True)
+    class BpropWithWrongOutputShape(PrimitiveWithInfer):
+        @prim_attr_register
+        def __init__(self):
+            super(BpropWithWrongOutputShape, self).__init__('BpropWithWrongOutputShape')
+
+        def __call__(self, x):
+            return x
+
+        def infer_shape(self, x_shape):
+            return x_shape
+
+        def infer_dtype(self, x_type):
+            return x_type
+
+    @bprop_getters.register(BpropWithWrongOutputShape)
+    def get_bprop_with_wrong_output_shape(self):
+        """Generate bprop for BpropWithWrongOutputShape"""
+        ones = Tensor(np.ones([2,]).astype(np.int32))
+
+        def bprop(x, out, dout):
+            return (ones,)
+
+        return bprop
+
+    class BpropWithWrongOutputShapeCell(nn.Cell):
+        def __init__(self):
+            super(BpropWithWrongOutputShapeCell, self).__init__()
+
+        def construct(self, x):
+            return BpropWithWrongOutputShape()(x)
+
+    with pytest.raises(TypeError):
+        net = BpropWithWrongOutputShapeCell()
+        net.set_grad()
+        C.grad_all(net)(Tensor(np.ones([64, 10]).astype(np.int32)))
diff --git a/tests/ut/python/pynative_mode/nn/test_tensor_operation.py b/tests/ut/python/pynative_mode/nn/test_tensor_operation.py
index 306ba63c9f1..eb8610bdf1f 100644
--- a/tests/ut/python/pynative_mode/nn/test_tensor_operation.py
+++ b/tests/ut/python/pynative_mode/nn/test_tensor_operation.py
@@ -78,3 +78,9 @@ def test_tensor_imul():
     y = Tensor(np.ones([3, 3, 3, 3]).astype(np.float32))
     x *= y
     assert x.asnumpy()[0][0][0][0] == 1.0
+
+
+def test_tensor_pow():
+    x = Tensor(np.ones([3, 3, 3, 3]).astype(np.float32) * 2)
+    y = x ** 3
+    assert y.asnumpy()[0][0][0][0] == 8.0
diff --git a/tests/ut/python/pynative_mode/ops/test_grad.py b/tests/ut/python/pynative_mode/ops/test_grad.py
index 8d880a86d9b..f028e91bebd 100644
--- a/tests/ut/python/pynative_mode/ops/test_grad.py
+++ b/tests/ut/python/pynative_mode/ops/test_grad.py
@@ -89,7 +89,11 @@ def test_scalar_cast_grad():
         output = F.scalar_cast(x, input_t)
         return output
 
-    gfn = C.grad(fx_cast)(input_x)
+    @ms_function
+    def grad_fx_cast(input_x):
+        return C.grad(fx_cast)(input_x)
+
+    gfn = grad_fx_cast(input_x)
     expect_dx = 1
     assert gfn == expect_dx
 
@@ -133,25 +137,6 @@ def test_transpose_grad():
     assert np.all(gout[0].asnumpy() == expect)
 
 
-@non_graph_engine
-def test_squeeze_grad():
-    """ test_squeeze_grad """
-    input_tensor = Tensor(np.ones(shape=[3, 2, 1]))
-    squeeze = P.Squeeze(2)
-
-    def fn(x):
-        output = squeeze(x)
-        return output
-
-    out = fn(input_tensor)
-    gfn = grad_all_with_sens(fn)
-    sens = Tensor(np.ones_like(out.asnumpy()))
-    args = [input_tensor, sens]
-    gout = gfn(*args)
-    expect = np.ones([3, 2, 1])
-    assert np.all(gout[0].asnumpy() == expect)
-
-
 def test_select_grad():
     """ test_select_grad """
     select = P.Select()
@@ -176,6 +161,25 @@ def test_select_grad():
     assert np.all(gout[2].asnumpy() == expect_y)
 
 
+@non_graph_engine
+def test_squeeze_grad():
+    """ test_squeeze_grad """
+    input_tensor = Tensor(np.ones(shape=[3, 2, 1]))
+    squeeze = P.Squeeze(2)
+
+    def fn(x):
+        output = squeeze(x)
+        return output
+
+    out = fn(input_tensor)
+    gfn = grad_all_with_sens(fn)
+    sens = Tensor(np.ones_like(out.asnumpy()))
+    args = [input_tensor, sens]
+    gout = gfn(*args)
+    expect = np.ones([3, 2, 1])
+    assert np.all(gout[0].asnumpy() == expect)
+
+
 def test_SubGrad():
     """ test_SubGrad """
     input_x = Tensor(np.array([[2, 2]]))
diff --git a/tests/ut/python/pynative_mode/test_context.py b/tests/ut/python/pynative_mode/test_context.py
index 66dc0a4f587..e2d4e314129 100644
--- a/tests/ut/python/pynative_mode/test_context.py
+++ b/tests/ut/python/pynative_mode/test_context.py
@@ -118,6 +118,12 @@ def test_variable_memory_max_size():
     context.set_context(variable_memory_max_size="3GB")
 
 
+def test_print_file_path():
+    """test_print_file_path"""
+    with pytest.raises(IOError):
+        context.set_context(print_file_path="./")
+
+
 def test_set_context():
     """ test_set_context """
     context.set_context(mode=context.GRAPH_MODE, device_target="Ascend",
diff --git a/tests/ut/python/pynative_mode/test_framstruct.py b/tests/ut/python/pynative_mode/test_framstruct.py
index 39a4c97ab9a..3b99d0dc5fa 100644
--- a/tests/ut/python/pynative_mode/test_framstruct.py
+++ b/tests/ut/python/pynative_mode/test_framstruct.py
@@ -16,6 +16,7 @@
 import numpy as np
 import pytest
 
+import mindspore as ms
 import mindspore.nn as nn
 from mindspore import context
 from mindspore.common import dtype as mstype
@@ -23,8 +24,6 @@ from mindspore.common.parameter import Parameter, ParameterTuple
 from mindspore.common.tensor import Tensor
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
-from mindspore.ops._grad.grad_base import bprop_getters
-from mindspore.ops.primitive import prim_attr_register, PrimitiveWithInfer
 from ..ut_filter import non_graph_engine
 from ....mindspore_test_framework.utils.check_gradient import (
     ms_function, check_jacobian, Tensor, NNGradChecker,
@@ -156,14 +155,14 @@ def test_if_always_true():
 @non_graph_engine
 def test_f():
     """ test_f """
-    res = mainf(3, 2)
+    res = mainf(Tensor(3, dtype=ms.int32), Tensor(2, dtype=ms.int32))
     assert res == (2, 3)
 
 
 @non_graph_engine
 def test_grad_add_mul():
     """ test_grad_add_mul """
-    res = grad_add_mul(3, 2)
+    res = grad_add_mul(Tensor(3, dtype=ms.int32), Tensor(2, dtype=ms.int32))
     assert res == (2, 7)
 
 
@@ -262,17 +261,19 @@ def test_if_tensor():
     assert res == Tensor(np.ones([1]).astype(np.int32) * 4)
 
 
-@ms_function
 def rec(x):
     """ rec """
     if x > 0:
         return rec(x - 1)
     return x
 
+@ms_function
+def grad_rec(input_x):
+    return C.grad(rec)(input_x)
 
 def test_grad_rec():
     """ test_grad_rec """
-    res = C.grad(rec)(10)
+    res = grad_rec(3)
     assert res == 1
 
 
@@ -282,7 +283,6 @@ def test_me_rec():
     assert res == 0
 
 
-@ms_function
 def t2_while(x, y):
     out = y - x
     i = 0
@@ -298,8 +298,10 @@ def test_while2():
 
 
 def test_grad_while2():
-    res = C.grad(t2_while)(2, 3)
-    assert res == 3
+    @ms_function
+    def df_t2_while(input_x, input_y):
+        return C.grad(t2_while)(input_x, input_y)
+    assert df_t2_while(2, 3) == 3
 
 
 def if_test(a, b):
@@ -316,7 +318,7 @@ def grad_if(x, y):
 
 def test_grad_if():
     """ test_grad_if """
-    assert grad_if(5, 4) == (3, 0)
+    assert grad_if(Tensor(5, dtype=ms.int32), Tensor(4, dtype=ms.int32)) == (3, 0)
 
 
 # While loop is not unrolled in forward and backward graphs.
@@ -421,7 +423,7 @@ def grad_while(x):
 
 def test_grad_while():
     """ test_grad_while """
-    assert grad_while(5) == (60,)
+    assert grad_while(Tensor(5, dtype=ms.int32)) == (60,)
 
 
 @ms_function
@@ -438,8 +440,10 @@ def test_factorial():
 
 
 def test_grad_factorial():
-    res = C.grad(factorial)(3)
-    assert res == 11
+    @ms_function
+    def df_factorial(x):
+        return C.grad(factorial)(x)
+    assert df_factorial(3) == 11
 
 
 @ms_function
@@ -513,7 +517,7 @@ def _for(x):
         ret = ret * i
     return ret
 
-
+@ms_function
 def grad_for(x):
     """ grad_for """
     return C.grad_all(_for)(x)
@@ -786,7 +790,10 @@ def multi_outputs(x, y):
 
 
 def test_grad_multi_outputs():
-    assert C.grad_all_with_sens(multi_outputs)(2, 3, (1, 1)) == (4, 4)
+    @ms_function
+    def df_multi_outputs(x, y):
+        return C.grad_all_with_sens(multi_outputs)(x, y, (1, 1))
+    assert df_multi_outputs(2, 3) == (4, 4)
 
 
 @ms_function
@@ -813,7 +820,7 @@ def grad_refactor_simple_1(x, y):
 
 
 def test_grad_refactor_simple_1():
-    assert C.grad_all(grad_refactor_simple_1)(2, 1) == (4, 2)
+    assert C.grad_all(grad_refactor_simple_1)(Tensor(2, dtype=ms.int32), Tensor(1, dtype=ms.int32)) == (4, 2)
 
 
 def grad_refactor_simple_2(x, y, z):
@@ -822,7 +829,10 @@ def grad_refactor_simple_2(x, y, z):
 
 
 def test_grad_refactor_simple_2():
-    assert C.grad_all(grad_refactor_simple_2)(2, 3, 0) == (7, 4, 7)
+    x = Tensor(2, dtype=ms.int32)
+    y = Tensor(3, dtype=ms.int32)
+    z = Tensor(0, dtype=ms.int32)
+    assert C.grad_all(grad_refactor_simple_2)(x, y, z) == (7, 4, 7)
 
 
 def grad_refactor_1(a, b):
@@ -835,7 +845,7 @@ def grad_refactor_1(a, b):
 
 
 def test_grad_refactor_1():
-    assert C.grad_all(grad_refactor_1)(2, 3) == (3, 2)
+    assert C.grad_all(grad_refactor_1)(Tensor(2, dtype=ms.int32), Tensor(3, dtype=ms.int32)) == (3, 2)
 
 
 def grad_refactor_2(a, b):
@@ -848,7 +858,7 @@ def grad_refactor_2(a, b):
 
 
 def test_grad_refactor_2():
-    assert C.grad_all(grad_refactor_2)(2, 3) == (27, 54)
+    assert C.grad_all(grad_refactor_2)(Tensor(2, dtype=ms.int32), Tensor(3, dtype=ms.int32)) == (27, 54)
 
 
 def grad_refactor_3(a):
@@ -859,7 +869,10 @@ def grad_refactor_3(a):
 
 
 def test_grad_refactor_3():
-    assert C.grad_all(grad_refactor_3)(3) == (3,)
+    @ms_function
+    def df_refactor_3(x):
+        return C.grad_all(grad_refactor_3)(x)
+    assert df_refactor_3(3) == (3,)
 
 
 def grad_refactor_4(a):
@@ -870,7 +883,7 @@ def grad_refactor_4(a):
 
 
 def test_grad_refactor_4():
-    assert C.grad_all(grad_refactor_4)(4) == (3,)
+    assert C.grad_all(grad_refactor_4)(Tensor(4, dtype=ms.int32)) == (3,)
 
 
 def grad_refactor_5(a):
@@ -881,7 +894,10 @@ def grad_refactor_5(a):
 
 
 def test_grad_refactor_5():
-    assert C.grad_all(grad_refactor_5)(1) == (1,)
+    @ms_function
+    def df_refactor_5(x):
+        return C.grad_all(grad_refactor_5)(x)
+    assert df_refactor_5(1) == (1,)
 
 
 def grad_refactor_6(a, b):
@@ -892,7 +908,7 @@ def grad_refactor_6(a, b):
 
 
 def test_grad_refactor_6():
-    assert C.grad_all(grad_refactor_6)(3, 2) == (3, 1)
+    assert C.grad_all(grad_refactor_6)(Tensor(3, dtype=ms.int32), Tensor(2, dtype=ms.int32)) == (3, 1)
 
 
 def grad_refactor_while(x):
@@ -904,7 +920,10 @@ def grad_refactor_while(x):
 
 
 def test_grad_refactor_9():
-    assert C.grad_all(grad_refactor_while)(3) == (6,)
+    @ms_function
+    def df_refactor_while(input_x):
+        return C.grad_all(grad_refactor_while)(input_x)
+    assert df_refactor_while(3) == (6,)
 
 
 def grad_refactor__while_1(x):
@@ -919,7 +938,7 @@ def grad_refactor__while_1(x):
 
 def test_grad_refactor_10():
     """ test_grad_while """
-    assert C.grad_all(grad_refactor__while_1)(5) == (60,)
+    assert C.grad_all(grad_refactor__while_1)(Tensor(5, dtype=ms.int32)) == (60,)
 
 
 def test_grad_refactor_11():
@@ -985,7 +1004,10 @@ def grad_refactor_14(a, b):
 
 
 def test_grad_refactor_14():
-    assert C.grad_all(grad_refactor_14)(2, 3) == (3, 9)
+    @ms_function
+    def df_refactor_14(x, y):
+        return C.grad_all(grad_refactor_14)(x, y)
+    assert df_refactor_14(2, 3) == (3, 9)
 
 
 # pylint: disable=using-constant-test
@@ -1011,109 +1033,11 @@ def test_grad_if_defer_inline():
     assert grads == (Tensor(np.full([128, 96], 0.6, dtype=np.float32)),)
 
 
-def test_bprop_with_wrong_output_num():
-    context.set_context(check_bprop=True)
-    class BpropWithWrongOutputNum(PrimitiveWithInfer):
-        @prim_attr_register
+def test_dict_const():
+    class Net(nn.Cell):
         def __init__(self):
-            super(BpropWithWrongOutputNum, self).__init__('BpropWithWrongOutputNum')
-
-        def __call__(self, x, y):
-            return x
-
-        def infer_shape(self, x_shape, yshape):
-            return x_shape
-
-        def infer_dtype(self, x_type, y_type):
-            return x_type
-
-    @bprop_getters.register(BpropWithWrongOutputNum)
-    def get_bprop_with_wrong_output_num(self):
-        """Generate bprop for BpropWithWrongOutputNum"""
-
-        def bprop(x, y, out, dout):
-            return (dout,)
-
-        return bprop
-
-    class BpropWithWrongOutputNumCell(nn.Cell):
-        def __init__(self):
-            super(BpropWithWrongOutputNumCell, self).__init__()
-
-        def construct(self, x, y):
-            return BpropWithWrongOutputNum()(x, y)
-
-    with pytest.raises(TypeError):
-        C.grad_all(BpropWithWrongOutputNumCell())(1, 2)
-
-def test_bprop_with_wrong_output_type():
-    context.set_context(check_bprop=True)
-    class BpropWithWrongOutputType(PrimitiveWithInfer):
-        @prim_attr_register
-        def __init__(self):
-            super(BpropWithWrongOutputType, self).__init__('BpropWithWrongOutputType')
-
-        def __call__(self, x):
-            return x
-
-        def infer_shape(self, x_shape):
-            return x_shape
-
-        def infer_dtype(self, x_type):
-            return x_type
-
-    @bprop_getters.register(BpropWithWrongOutputType)
-    def get_bprop_with_wrong_output_type(self):
-        """Generate bprop for BpropWithWrongOutputType"""
-
-        def bprop(x, out, dout):
-            return (1,)
-
-        return bprop
-
-    class BpropWithWrongOutputTypeCell(nn.Cell):
-        def __init__(self):
-            super(BpropWithWrongOutputTypeCell, self).__init__()
-
-        def construct(self, x):
-            return BpropWithWrongOutputType()(x)
-
-    with pytest.raises(TypeError):
-        C.grad_all(BpropWithWrongOutputTypeCell())(Tensor(np.ones([64, 10]).astype(np.int32)))
-
-
-def test_bprop_with_wrong_output_shape():
-    context.set_context(check_bprop=True)
-    class BpropWithWrongOutputShape(PrimitiveWithInfer):
-        @prim_attr_register
-        def __init__(self):
-            super(BpropWithWrongOutputShape, self).__init__('BpropWithWrongOutputShape')
-
-        def __call__(self, x):
-            return x
-
-        def infer_shape(self, x_shape):
-            return x_shape
-
-        def infer_dtype(self, x_type):
-            return x_type
-
-    @bprop_getters.register(BpropWithWrongOutputShape)
-    def get_bprop_with_wrong_output_shape(self):
-        """Generate bprop for BpropWithWrongOutputShape"""
-        ones = Tensor(np.ones([2,]).astype(np.int32))
-
-        def bprop(x, out, dout):
-            return (ones,)
-
-        return bprop
-
-    class BpropWithWrongOutputShapeCell(nn.Cell):
-        def __init__(self):
-            super(BpropWithWrongOutputShapeCell, self).__init__()
-
-        def construct(self, x):
-            return BpropWithWrongOutputShape()(x)
-
-    with pytest.raises(TypeError):
-        C.grad_all(BpropWithWrongOutputShapeCell())(Tensor(np.ones([64, 10]).astype(np.int32)))
+            super(Net, self).__init__()
+            self.res = {'1': 10}
+        def construct(self):
+            return self.res
+    Net()()
diff --git a/tests/ut/python/pynative_mode/test_hook.py b/tests/ut/python/pynative_mode/test_hook.py
index 07a7a7ad8b8..f34a81ab5c0 100644
--- a/tests/ut/python/pynative_mode/test_hook.py
+++ b/tests/ut/python/pynative_mode/test_hook.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
+import pytest
 
 import mindspore.nn as nn
 import mindspore.ops.operations as P
@@ -154,22 +155,47 @@ def test_hook():
     print(loss_output.asnumpy().shape)
 
 
+bprop_debug = False
+
 class MulAdd(nn.Cell):
     def __init__(self):
         super(MulAdd, self).__init__()
 
     def construct(self, x, y):
-        return 2 * x + y
+        return 2 * x * x + y * y
 
     def bprop(self, x, y, out, dout):
-        assert (x == 1)
-        assert (y == 2)
-        assert (out == 4)
-        assert (dout == 1)
-        return 3 * dout, 2 * y
+        global bprop_debug
+        bprop_debug = True
+        return dout, 2 * y
 
 
 def test_custom_bprop():
     mul_add = MulAdd()
     mul_add.bprop_debug = True
-    assert C.grad_all(mul_add)(1, 2) == (3, 4)
+    x = Tensor(np.array([1, 2, 3]).astype(np.int32))
+    y = Tensor(np.array([2, 3, 4]).astype(np.int32))
+    C.grad_all(mul_add)(x, y)
+    assert bprop_debug
+
+
+class Net(nn.Cell):
+    def __init__(self):
+        super(Net, self).__init__()
+
+    def construct(self, x, y):
+        return 2 * x * x + y * y
+
+def test_grad_all():
+    net = Net()
+    x = Tensor(np.array([1, 2, 3]).astype(np.int32))
+    y = Tensor(np.array([2, 3, 4]).astype(np.int32))
+    res = C.grad_all(net)(x, y)
+    print(res)
+
+def test_check_input():
+    net = Net()
+    x = np.array([1, 2, 3])
+    y = np.array([2, 3, 4])
+    with pytest.raises(TypeError):
+        net(x, y)
diff --git a/tests/ut/python/pynative_mode/test_implicit_conversion.py b/tests/ut/python/pynative_mode/test_implicit_conversion.py
new file mode 100644
index 00000000000..ecaffd87f26
--- /dev/null
+++ b/tests/ut/python/pynative_mode/test_implicit_conversion.py
@@ -0,0 +1,204 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+""" test implicit conversion """
+import numpy as np
+
+from mindspore import Tensor, nn
+from mindspore.ops import composite as C
+
+
+def test_float_tensor_and_int_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = 2
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[2.1, 2.2, 2.3], [2.4, 2.5, 2.6]], dtype=np.float32))
+    assert ret_actual.dtype == ret_expect.dtype
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_bool_tensor_and_float_add():
+    x = Tensor(np.array([[True, False], [False, True]], dtype=np.bool_))
+    y = 3.3
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[4.3, 3.3], [3.3, 4.3]], dtype=np.float32))
+    assert ret_actual.dtype == ret_expect.dtype
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_bool_tensor_and_int_add():
+    x = Tensor(np.array([[True, False], [False, True]], dtype=np.bool_))
+    y = 3
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[4, 3], [3, 4]], dtype=np.int32))
+    assert ret_actual.dtype == ret_expect.dtype
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_bool_and_int_tensor_add():
+    x = True
+    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[2, 3, 4], [5, 6, 7]], dtype=np.int32))
+    assert ret_actual.dtype == ret_expect.dtype
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_float_tensor_and_int_tensor_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]], dtype=np.float32))
+    assert ret_actual.dtype == ret_expect.dtype
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_float_tensor_and_float_tensor_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float16))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]], dtype=np.float32))
+    assert ret_actual.dtype == ret_expect.dtype
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_int_tensor_and_int_tensor_add():
+    x = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int8))
+    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[2, 4, 6], [8, 10, 12]], dtype=np.int32))
+    assert ret_actual.dtype == ret_expect.dtype
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_float_tensor_and_bool_tensors_add():
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = Tensor(np.array([[True, True, True], [False, False, False]], dtype=np.bool_))
+    ret_actual = x + y
+    ret_expect = Tensor(np.array([[1.1, 1.2, 1.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    assert (ret_actual.asnumpy() == ret_expect.asnumpy()).all()
+
+
+def test_float_tensor_and_bool_tensors_add_grad():
+    class Net(nn.Cell):
+        def __init__(self):
+            super(Net, self).__init__()
+
+        def construct(self, x, y):
+            return x + y
+
+    class GradNet(nn.Cell):
+        def __init__(self, net):
+            super(GradNet, self).__init__()
+            self.net = net
+
+        def construct(self, x, y, sens):
+
+            return C.grad_all_with_sens(self.net)(x, y, sens)
+
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = Tensor(np.array([[True, True, True], [False, False, False]], dtype=np.bool_))
+    sens = Tensor(np.array([[1.0, 2.0, 0.0], [0.0, 3.0, 4.0]], dtype=np.float32))
+    net = Net()
+    grad_net = GradNet(net)
+    ret = grad_net(x, y, sens)
+    assert ret[0].dtype == x.dtype
+    assert ret[1].dtype == y.dtype
+    assert (ret[0].asnumpy() == sens.asnumpy()).all()
+    assert (ret[1].asnumpy() == sens.asnumpy().astype(np.bool_)).all()
+
+
+def test_float_tensor_and_int_tensors_sub_grad():
+    class Net(nn.Cell):
+        def __init__(self):
+            super(Net, self).__init__()
+
+        def construct(self, x, y):
+            return x - y
+
+    class GradNet(nn.Cell):
+        def __init__(self, net):
+            super(GradNet, self).__init__()
+            self.net = net
+
+        def construct(self, x, y, sens):
+
+            return C.grad_all_with_sens(self.net)(x, y, sens)
+
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    y = Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32))
+    sens = Tensor(np.array([[1.0, 2.0, 0.0], [0.0, 3.0, 4.0]], dtype=np.float32))
+    net = Net()
+    grad_net = GradNet(net)
+    ret = grad_net(x, y, sens)
+    print(ret)
+    assert ret[0].dtype == x.dtype
+    assert ret[1].dtype == y.dtype
+    assert (ret[0].asnumpy() == sens.asnumpy()).all()
+    assert (ret[1].asnumpy() == sens.asnumpy() * -1).all()
+
+
+def test_float16_tensor_and_float32_tensors_sub_grad():
+    class Net(nn.Cell):
+        def __init__(self):
+            super(Net, self).__init__()
+
+        def construct(self, x, y):
+            return x - y
+
+    class GradNet(nn.Cell):
+        def __init__(self, net):
+            super(GradNet, self).__init__()
+            self.net = net
+
+        def construct(self, x, y, sens):
+
+            return C.grad_all_with_sens(self.net)(x, y, sens)
+
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.int32))
+    y = Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32))
+    sens = Tensor(np.array([[1.0, 2.0, 0.0], [0.0, 3.0, 4.0]], dtype=np.float32))
+    net = Net()
+    grad_net = GradNet(net)
+    ret = grad_net(x, y, sens)
+    print(ret)
+    assert ret[0].dtype == x.dtype
+    assert ret[1].dtype == y.dtype
+    assert (ret[0].asnumpy() == sens.asnumpy()).all()
+    assert (ret[1].asnumpy() == sens.asnumpy() * -1).all()
+
+
+def test_float_tensor_and_int_add_grad():
+    class Net(nn.Cell):
+        def __init__(self):
+            super(Net, self).__init__()
+
+        def construct(self, x):
+            return x + 2
+
+    class GradNet(nn.Cell):
+        def __init__(self, net):
+            super(GradNet, self).__init__()
+            self.net = net
+
+        def construct(self, x, sens):
+            return C.grad_all_with_sens(self.net)(x, sens)
+
+    x = Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
+    sens = Tensor(np.array([[1.0, 2.0, 0.0], [0.0, 3.0, 4.0]], dtype=np.float32))
+    net = Net()
+    grad_net = GradNet(net)
+    ret = grad_net(x, sens)
+    assert ret[0].dtype == x.dtype
+    assert (ret[0].asnumpy() == sens.asnumpy()).all()
diff --git a/tests/ut/python/pynative_mode/test_insert_grad_of.py b/tests/ut/python/pynative_mode/test_insert_grad_of.py
index 0a28bbbb634..218a4ee2533 100644
--- a/tests/ut/python/pynative_mode/test_insert_grad_of.py
+++ b/tests/ut/python/pynative_mode/test_insert_grad_of.py
@@ -46,6 +46,7 @@ def test_InsertGradientOf_1():
         c = x * y
         return c
 
+    @ms_function
     def f(x, y):
         return C.grad_all(stop_test)(x, y)
 
@@ -80,6 +81,7 @@ def test_InsertGradientOf_2():
     def f(x, y):
         return clip_test(x, y)
 
+    @ms_function
     def fd(x, y):
         return C.grad_all(clip_test)(x, y)
 
diff --git a/tests/ut/python/pynative_mode/test_stop_gradient.py b/tests/ut/python/pynative_mode/test_stop_gradient.py
index a94f80adf09..09e4f25c54c 100644
--- a/tests/ut/python/pynative_mode/test_stop_gradient.py
+++ b/tests/ut/python/pynative_mode/test_stop_gradient.py
@@ -16,6 +16,7 @@
 import numpy as np
 import pytest
 
+import mindspore as ms
 import mindspore.common.dtype as mstype
 import mindspore.nn as nn
 from mindspore import Parameter, ParameterTuple
@@ -81,16 +82,24 @@ def stop_test4(x, y):
     return e
 
 
+@ms_function
 def grad_stop_test(x, y):
     """ grad_stop_test """
     return C.grad_all(stop_test2)(x, y)
 
 
+@ms_function
 def grad_stop_test1(x, y):
     """ grad_stop_test1 """
     return C.grad_all(stop_test3)(x, y)
 
 
+@ms_function
+def grad_stop_test5(x, y):
+    """ grad_stop_test5 """
+    return C.grad_all(stop_test5)(x, y)
+
+
 def test_stop():
     """ test_stop """
     print("test_stop:", grad_stop_test(1, 1))
@@ -103,7 +112,7 @@ def test_stop1():
 
 def test_stop5():
     """ test_stop1 """
-    print("test_stop5:", C.grad_all(stop_test5)(2, 3))
+    print("test_stop5:", grad_stop_test5(2, 3))
 
 
 class GradWrap(nn.Cell):
@@ -247,7 +256,7 @@ def test_stop_gradient_4():
     def stop_test(x):
         return stop_gradient(x)
 
-    assert C.grad_all(stop_test)(1) == (0,)
+    assert C.grad_all(stop_test)(Tensor(1, dtype=ms.int32)) == (0,)
 
 
 def test_stop_gradient_5():
@@ -257,7 +266,7 @@ def test_stop_gradient_5():
         ret = x + y
         return ret
 
-    assert C.grad_all(stop_test)(1) == (1,)
+    assert C.grad_all(stop_test)(Tensor(1, dtype=ms.int32)) == (1,)
 
 
 def test_stop_gradient_6():
@@ -266,7 +275,7 @@ def test_stop_gradient_6():
         ret = stop_gradient(ret)
         return ret
 
-    assert C.grad_all(stop_test)(1, 3) == (0, 0)
+    assert C.grad_all(stop_test)(Tensor(1, dtype=ms.int32), Tensor(3, dtype=ms.int32)) == (0, 0)
 
 
 class PrimWithMultiOutputs(PrimitiveWithInfer):
diff --git a/tests/ut/python/train/quant/mobilenetv2.py b/tests/ut/python/train/quant/mobilenetv2.py
deleted file mode 100644
index 163b230e1e0..00000000000
--- a/tests/ut/python/train/quant/mobilenetv2.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""MobileNetV2"""
-from mindspore import nn
-from mindspore.ops import operations as P
-
-
-def make_divisible(input_x, div_by=8):
-    return int((input_x + div_by) // div_by)
-
-
-def _conv_bn(in_channel,
-             out_channel,
-             ksize,
-             stride=1):
-    """Get a conv2d batchnorm and relu layer."""
-    return nn.SequentialCell(
-        [nn.Conv2d(in_channel,
-                   out_channel,
-                   kernel_size=ksize,
-                   stride=stride),
-         nn.BatchNorm2d(out_channel)])
-
-
-class InvertedResidual(nn.Cell):
-    def __init__(self, inp, oup, stride, expend_ratio):
-        super(InvertedResidual, self).__init__()
-        self.stride = stride
-        assert stride in [1, 2]
-
-        hidden_dim = int(inp * expend_ratio)
-        self.use_res_connect = self.stride == 1 and inp == oup
-        if expend_ratio == 1:
-            self.conv = nn.SequentialCell([
-                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, group=hidden_dim),
-                nn.BatchNorm2d(hidden_dim),
-                nn.ReLU6(),
-                nn.Conv2d(hidden_dim, oup, 1, 1),
-                nn.BatchNorm2d(oup)
-            ])
-        else:
-            self.conv = nn.SequentialCell([
-                nn.Conv2d(inp, hidden_dim, 1, 1),
-                nn.BatchNorm2d(hidden_dim),
-                nn.ReLU6(),
-
-                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, group=hidden_dim),
-                nn.BatchNorm2d(hidden_dim),
-                nn.ReLU6(),
-
-                nn.Conv2d(hidden_dim, oup, 1, 1),
-                nn.BatchNorm2d(oup)
-            ])
-
-    def construct(self, input_x):
-        out = self.conv(input_x)
-        if self.use_res_connect:
-            out = input_x + out
-        return out
-
-
-class MobileNetV2(nn.Cell):
-    def __init__(self, num_class=1000, input_size=224, width_mul=1.):
-        super(MobileNetV2, self).__init__()
-        _ = input_size
-        block = InvertedResidual
-        input_channel = 32
-        last_channel = 1280
-        inverted_residual_setting = [
-            [1, 16, 1, 1],
-            [6, 24, 2, 2],
-            [6, 32, 3, 2],
-            [6, 64, 4, 2],
-            [6, 96, 3, 1],
-            [6, 160, 3, 2],
-            [6, 230, 1, 1],
-        ]
-        if width_mul > 1.0:
-            last_channel = make_divisible(last_channel * width_mul)
-        self.last_channel = last_channel
-        features = [_conv_bn(3, input_channel, 3, 2)]
-
-        for t, c, n, s in inverted_residual_setting:
-            out_channel = make_divisible(c * width_mul) if t > 1 else c
-            for i in range(n):
-                if i == 0:
-                    features.append(block(input_channel, out_channel, s, t))
-                else:
-                    features.append(block(input_channel, out_channel, 1, t))
-                input_channel = out_channel
-
-        features.append(_conv_bn(input_channel, self.last_channel, 1))
-
-        self.features = nn.SequentialCell(features)
-        self.mean = P.ReduceMean(keep_dims=False)
-        self.classifier = nn.Dense(self.last_channel, num_class)
-
-    def construct(self, input_x):
-        out = input_x
-        out = self.features(out)
-        out = self.mean(out, (2, 3))
-        out = self.classifier(out)
-        return out
diff --git a/tests/ut/python/train/quant/mobilenetv2_combined.py b/tests/ut/python/train/quant/mobilenetv2_combined.py
deleted file mode 100644
index 51916192d84..00000000000
--- a/tests/ut/python/train/quant/mobilenetv2_combined.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""mobile net v2"""
-from mindspore import nn
-from mindspore.ops import operations as P
-
-
-def make_divisible(input_x, div_by=8):
-    return int((input_x + div_by) // div_by)
-
-
-def _conv_bn(in_channel,
-             out_channel,
-             ksize,
-             stride=1):
-    """Get a conv2d batchnorm and relu layer."""
-    return nn.SequentialCell(
-        [nn.Conv2dBnAct(in_channel,
-                        out_channel,
-                        kernel_size=ksize,
-                        stride=stride,
-                        has_bn=True)])
-
-
-class InvertedResidual(nn.Cell):
-    def __init__(self, inp, oup, stride, expend_ratio):
-        super(InvertedResidual, self).__init__()
-        self.stride = stride
-        assert stride in [1, 2]
-
-        hidden_dim = int(inp * expend_ratio)
-        self.use_res_connect = self.stride == 1 and inp == oup
-        if expend_ratio == 1:
-            self.conv = nn.SequentialCell([
-                nn.Conv2dBnAct(hidden_dim,
-                               hidden_dim,
-                               3,
-                               stride,
-                               group=hidden_dim,
-                               has_bn=True,
-                               activation='relu6'),
-                nn.Conv2dBnAct(hidden_dim, oup, 1, 1,
-                               has_bn=True)
-            ])
-        else:
-            self.conv = nn.SequentialCell([
-                nn.Conv2dBnAct(inp, hidden_dim, 1, 1,
-                               has_bn=True,
-                               activation='relu6'),
-                nn.Conv2dBnAct(hidden_dim,
-                               hidden_dim,
-                               3,
-                               stride,
-                               group=hidden_dim,
-                               has_bn=True,
-                               activation='relu6'),
-                nn.Conv2dBnAct(hidden_dim, oup, 1, 1,
-                               has_bn=True)
-            ])
-        self.add = P.TensorAdd()
-
-    def construct(self, input_x):
-        out = self.conv(input_x)
-        if self.use_res_connect:
-            out = self.add(input_x, out)
-        return out
-
-
-class MobileNetV2(nn.Cell):
-    def __init__(self, num_class=1000, input_size=224, width_mul=1.):
-        super(MobileNetV2, self).__init__()
-        _ = input_size
-        block = InvertedResidual
-        input_channel = 32
-        last_channel = 1280
-        inverted_residual_setting = [
-            [1, 16, 1, 1],
-            [6, 24, 2, 2],
-            [6, 32, 3, 2],
-            [6, 64, 4, 2],
-            [6, 96, 3, 1],
-            [6, 160, 3, 2],
-            [6, 230, 1, 1],
-        ]
-        if width_mul > 1.0:
-            last_channel = make_divisible(last_channel * width_mul)
-        self.last_channel = last_channel
-        features = [_conv_bn(3, input_channel, 3, 2)]
-
-        for t, c, n, s in inverted_residual_setting:
-            out_channel = make_divisible(c * width_mul) if t > 1 else c
-            for i in range(n):
-                if i == 0:
-                    features.append(block(input_channel, out_channel, s, t))
-                else:
-                    features.append(block(input_channel, out_channel, 1, t))
-                input_channel = out_channel
-
-        features.append(_conv_bn(input_channel, self.last_channel, 1))
-
-        self.features = nn.SequentialCell(features)
-        self.mean = P.ReduceMean(keep_dims=False)
-        self.classifier = nn.DenseBnAct(self.last_channel, num_class)
-
-    def construct(self, input_x):
-        out = input_x
-        out = self.features(out)
-        out = self.mean(out, (2, 3))
-        out = self.classifier(out)
-        return out
diff --git a/tests/ut/python/train/quant/test_quant.py b/tests/ut/python/train/quant/test_quant.py
index 1a21bc2c023..39e887170ca 100644
--- a/tests/ut/python/train/quant/test_quant.py
+++ b/tests/ut/python/train/quant/test_quant.py
@@ -20,7 +20,7 @@ import mindspore.context as context
 from mindspore import Tensor
 from mindspore import nn
 from mindspore.train.quant import quant as qat
-from mobilenetv2_combined import MobileNetV2
+from model_zoo.mobilenetv2_quant.src.mobilenetV2 import mobilenetV2
 
 context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
 
@@ -42,7 +42,7 @@ class LeNet5(nn.Cell):
     def __init__(self, num_class=10):
         super(LeNet5, self).__init__()
         self.num_class = num_class
-        self.conv1 = nn.Conv2dBnAct(1, 6, kernel_size=5, has_bn=True, activation='relu6', pad_mode="valid")
+        self.conv1 = nn.Conv2dBnAct(1, 6, kernel_size=5, has_bn=True, activation='relu', pad_mode="valid")
         self.conv2 = nn.Conv2dBnAct(6, 16, kernel_size=5, activation='relu', pad_mode="valid")
         self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
         self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
@@ -67,20 +67,19 @@ def test_qat_lenet():
     img = Tensor(np.ones((32, 1, 32, 32)).astype(np.float32))
     net = LeNet5()
     net = qat.convert_quant_network(
-        net, freeze_bn=10000, num_bits=8)
+        net, bn_fold=True, per_channel=[True, False], symmetric=[True, False])
     # should load the checkpoint. mock here
     for param in net.get_parameters():
         param.init_data()
-    qat.export_geir(net, img, file_name="quant.pb")
+    qat.export(net, img, file_name="quant.pb")
 
 
 @pytest.mark.skip(reason="no `te.lang.cce` in ut env")
 def test_qat_mobile():
-    net = MobileNetV2()
+    network = mobilenetV2(num_classes=1000)
     img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32))
-    net = qat.convert_quant_network(
-        net, quant_delay=0, bn_fold=True, freeze_bn=10000, num_bits=8)
+    network = qat.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False])
     # should load the checkpoint. mock here
-    for param in net.get_parameters():
+    for param in network.get_parameters():
         param.init_data()
-    qat.export_geir(net, img, file_name="quant.pb")
+    qat.export(network, img, file_name="quant.pb")
diff --git a/tests/ut/python/train/test_amp.py b/tests/ut/python/train/test_amp.py
index c7befb6c2be..6bb4ec54642 100644
--- a/tests/ut/python/train/test_amp.py
+++ b/tests/ut/python/train/test_amp.py
@@ -22,10 +22,10 @@ from mindspore import amp
 from mindspore import nn
 from mindspore.train import Model, ParallelMode
 from mindspore.common import dtype as mstype
-from mindspore.model_zoo.resnet import resnet50
 from ....dataset_mock import MindData
 from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from mindspore.communication.management import init
+from tests.ut.python.model.resnet import resnet50
 
 def setup_module(module):
     _ = module
diff --git a/tests/ut/python/utils/test_serialize.py b/tests/ut/python/utils/test_serialize.py
index 035ea878459..7f85695a194 100644
--- a/tests/ut/python/utils/test_serialize.py
+++ b/tests/ut/python/utils/test_serialize.py
@@ -34,7 +34,7 @@ from mindspore.train.serialization import save_checkpoint, load_checkpoint, load
     _exec_save_checkpoint, export, _save_graph
 from ..ut_filter import non_graph_engine
 
-context.set_context(mode=context.GRAPH_MODE, print_file_path="print.pb")
+context.set_context(mode=context.GRAPH_MODE, print_file_path="print/print.pb")
 
 
 class Net(nn.Cell):
@@ -374,10 +374,13 @@ def test_print():
 
 
 def teardown_module():
-    files = ['parameters.ckpt', 'new_ckpt.ckpt', 'empty.ckpt', 'print.pb']
+    files = ['parameters.ckpt', 'new_ckpt.ckpt', 'empty.ckpt']
     for item in files:
         file_name = './' + item
         if not os.path.exists(file_name):
             continue
         os.chmod(file_name, stat.S_IWRITE)
         os.remove(file_name)
+    import shutil
+    if os.path.exists('./print'):
+        shutil.rmtree('./print')
diff --git a/tests/vm_impl/vm_me.py b/tests/vm_impl/vm_me.py
index 89cc1569a96..7216ec613bd 100644
--- a/tests/vm_impl/vm_me.py
+++ b/tests/vm_impl/vm_me.py
@@ -441,7 +441,7 @@ def max_pool_grad(x, dout, pool_h, pool_w, stride):
     """Grad of max pooling."""
     dout = dout.transpose(0, 2, 3, 1)
     pool_size = pool_h * pool_w
-    dmax = np.zeros((dout.size, pool_size))
+    dmax = np.zeros((dout.size, pool_size), dout.dtype)
     col = im2col(x, pool_h, pool_w, stride)
     col = col.reshape(-1, pool_h * pool_w)
     arg_max = np.argmax(col, axis=1)
@@ -456,7 +456,7 @@ def max_pool_grad_with_argmax(x, dout, arg_max, pool_h, pool_w, stride):
     """Grad of max pooling with argmax."""
     dout = dout.transpose(0, 2, 3, 1)
     pool_size = pool_h * pool_w
-    dmax = np.zeros((dout.size, pool_size))
+    dmax = np.zeros((dout.size, pool_size), dout.dtype)
     dmax[np.arange(arg_max.size), arg_max.flatten()] = dout.flatten()
     dmax = dmax.reshape(dout.shape + (pool_size,))
     dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
diff --git a/third_party/icu4c/filter.json b/third_party/icu4c/filter.json
deleted file mode 100644
index b3decad8fb4..00000000000
--- a/third_party/icu4c/filter.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-  "strategy": "additive",
-  "featureFilters": {
-    "normalization": "include"
-  }
-}
\ No newline at end of file
diff --git a/third_party/patch/pslite/ps_lite.patch001 b/third_party/patch/pslite/ps_lite.patch001
index bdc7b11a4b2..e2e51e93c86 100644
--- a/third_party/patch/pslite/ps_lite.patch001
+++ b/third_party/patch/pslite/ps_lite.patch001
@@ -12,16 +12,7 @@ diff -Npur ps-lite-master/include/dmlc/base.h ps-lite-master-new/include/dmlc/ba
  /*!
 diff -Npur ps-lite-master/include/dmlc/logging.h ps-lite-master-new/include/dmlc/logging.h
 --- ps-lite-master/include/dmlc/logging.h	2020-02-29 13:59:55.000000000 +0800
-+++ ps-lite-master-new/include/dmlc/logging.h	2020-07-01 11:58:00.015919207 +0800
-@@ -13,7 +13,7 @@
- #include <string>
- #include <vector>
- #include <memory>
--#include "./base.h"
-+//#include "./base.h"
- 
- #if DMLC_LOG_STACK_TRACE
- #include <cxxabi.h>
++++ ps-lite-master-new/include/dmlc/logging.h	2020-07-08 21:35:33.334584767 +0800
 @@ -52,7 +52,7 @@ struct Error : public std::runtime_error
  
  namespace dmlc {