syn code 0706

2020-07-06 15:28:10 +08:00 · 2020-07-06 15:28:10 +08:00 · 17da929b82
parent 6086a7252e d0dd892884
commit 17da929b82
1120 changed files with 38701 additions and 55861 deletions
--- a/.gitignore
+++ b/.gitignore
@ -26,6 +26,7 @@ cmake-build-debug
 *_pb2.py
 *.pb.h
 *.pb.cc
+*.pb

 # Object files
 *.o
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -12,7 +12,7 @@ if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
 endif ()

 if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-    set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Werror -Wno-return-std-move -Wno-unused-private-field -Wno-unused-lambda-capture -Wno-sign-compare -Wno-overloaded-virtual -Wno-unneeded-internal-declaration -Wno-unused-variable -Wno-pessimizing-move -Wno-inconsistent-missing-override -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")    
+    set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Werror -Wno-return-std-move -Wno-unused-private-field -Wno-unused-lambda-capture -Wno-sign-compare -Wno-overloaded-virtual -Wno-unneeded-internal-declaration -Wno-unused-variable -Wno-pessimizing-move -Wno-inconsistent-missing-override -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
 else()
    set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
 endif()
@ -38,6 +38,10 @@ if (NOT Patch_FOUND)
 endif ()
 message(PATCH_EXECUTABLE = ${Patch_EXECUTABLE})

+if (ENABLE_AKG AND ENABLE_D)
+    add_subdirectory("${CMAKE_SOURCE_DIR}/akg")
+endif()
+
 include(${CMAKE_SOURCE_DIR}/cmake/mind_expression.cmake)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/flatbuffers/include)
@ -86,10 +90,6 @@ if (ENABLE_GE OR ENABLE_D OR ENABLE_TESTCASES)
    include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc/toolchain)
 endif()

-if (ENABLE_AKG AND ENABLE_D)
-    add_subdirectory("${CMAKE_SOURCE_DIR}/akg")
-endif()
-
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
 add_subdirectory(mindspore/ccsrc)
 if (ENABLE_TESTCASES)
--- a/README.md
+++ b/README.md
@ -29,7 +29,7 @@ enrichment of the AI software/hardware application ecosystem.

 <img src="docs/MindSpore-architecture.png" alt="MindSpore Architecture" width="600"/>

-For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.3.0-alpha/architecture.html).
+For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/master/architecture.html).

 ### Automatic Differentiation

@ -66,7 +66,6 @@ MindSpore offers build options across multiple backends:
 | Ascend910 | Ubuntu-x86 | ✔️ |
 |  | EulerOS-x86 | ✔️ |
 |  | EulerOS-aarch64 | ✔️ |
-| GPU CUDA 9.2 | Ubuntu-x86 | ✔️ |
 | GPU CUDA 10.1 | Ubuntu-x86 | ✔️ |
 | CPU | Ubuntu-x86 | ✔️ |
 |  | Windows-x86 | ✔️ |
@ -76,7 +75,7 @@ For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an ex
 1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package.

    ```
-    pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.3.0-alpha/MindSpore/cpu/ubuntu_x86/mindspore-0.3.0-cp37-cp37m-linux_x86_64.whl
+    pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.5.0-beta/MindSpore/cpu/ubuntu_x86/mindspore-0.5.0-cp37-cp37m-linux_x86_64.whl
    ```

 2. Run the following command to verify the install.
@ -133,8 +132,8 @@ currently the containerized build options are supported as follows:

    For `CPU` backend, you can directly pull and run the latest stable image using the below command:
    ```
-    docker pull mindspore/mindspore-cpu:0.3.0-alpha
-    docker run -it mindspore/mindspore-cpu:0.3.0-alpha /bin/bash
+    docker pull mindspore/mindspore-cpu:0.5.0-beta
+    docker run -it mindspore/mindspore-cpu:0.5.0-beta /bin/bash
    ```

 * GPU
@ -151,8 +150,8 @@ currently the containerized build options are supported as follows:

    Then you can pull and run the latest stable image using the below command:
    ```
-    docker pull mindspore/mindspore-gpu:0.3.0-alpha
-    docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.3.0-alpha /bin/bash
+    docker pull mindspore/mindspore-gpu:0.5.0-beta
+    docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.5.0-beta /bin/bash
    ```

    To test if the docker image works, please execute the python code below and check the output:
@ -187,7 +186,7 @@ please check out [docker](docker/README.md) repo for the details.

 ## Quickstart

-See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.3.0-alpha/quick_start/quick_start.html)
+See the [Quick Start](https://www.mindspore.cn/tutorial/en/master/quick_start/quick_start.html)
 to implement the image classification.

 ## Docs
--- a/RELEASE.md
+++ b/RELEASE.md
@ -1,3 +1,75 @@
+# Release 0.5.0-beta
+
+## Major Features and Improvements
+
+### Ascend 910 Training and Inference Framework
+* New models
+    * ResNext50: a simple, highly modularized network architecture using aggregated resdiual transformations for image classification on ImageNet 2012 dataset.
+    * MASS: a pre-training method for sequence to sequence based language generation tasks on Text Summarization and Conversational Response Generation using News Crawls 2007-2017 dataset, Gigaword corpus and Cornell movie dialog corpus.
+    * Transformer: a neural network architecture for language understanding on WMT 2014 English-German dataset.
+    * GCN：Graph Convolutional Networks for the task of classification of nodes in a graph on Cora and Citeseer datasets.
+    * GAT：an attention-based graph neural network for node classification on Cora and CiteSeer dataset.
+* Frontend and user interface
+    * Support tensor value and assignment of mixed tensor index in graph mode.
+    * Support tensor comparison, len operator, constexpr syntax, value and assignment of tensor index in pynative mode.
+    * Support converting MindSpore IR to pb format for infer model.
+    * Support print operator to write data directly on the hard disk.
+    * Add the double recursive programming solution for very high speed parallel strategy search in automatic parallel.
+    * User interfaces change log
+      * Allow the learning rate of AdamWeightDecayDynamicLR and Lamb to be 0([!1826](https://gitee.com/mindspore/mindspore/pulls/1826))
+      * Restricting the entire network input parameter is Tensor([!1967](https://gitee.com/mindspore/mindspore/pulls/1967))
+      * Turn shape and dtype into attributes instead of interfaces([!1919](https://gitee.com/mindspore/mindspore/pulls/1919))
+      * Delete multitypefungraph([!2116](https://gitee.com/mindspore/mindspore/pulls/2116))
+      * Refactor the callback module in an encapsulated way, use _CallbackManager instead of _build_callbacks([!2236](https://gitee.com/mindspore/mindspore/pulls/2236))
+      * Delete EmbeddingLookup([!2163](https://gitee.com/mindspore/mindspore/pulls/2163))
+      * Checkpoint add model_type([!2517](https://gitee.com/mindspore/mindspore/pulls/2517))
+* Executor and performance optimization
+    * Heterogeneous execution on CPU and Ascend devices supported, and is verified in Wide&Deep model.
+    * Quantitative training of MobileNetV2, Lenet and Resnet50 on Ascend-910 are supported.
+    * Support new fusion architecture, which can do fusion optimization across graphs and kernels to improve execution speed.
+* Data processing, augmentation, and save format
+    * Support data processing pipeline performance profiling.
+    * Support public dataset loading, such as CLUE and Coco.
+    * Support more text processing, such as more tokenizers and vocab data.
+    * Support MindRecord padded data.
+### Other Hardware Support
+* GPU platform
+    * New model supported: Bert / Wide&Deep.
+    * Support setting max device memory.
+* CPU platform
+    * New model supported: LSTM.
+
+## Bugfixes
+* Models
+    * Bert, Move Bert from `example` to `model_zoo`, optimize network for better performance. ([!1902](https://gitee.com/mindspore/mindspore/pulls/1902))
+    * VGG16, Move VGG16 from `example` to `model_zoo`, optimize network for better accuracy. ([!2645](https://gitee.com/mindspore/mindspore/pulls/2645))
+    * Alexnet, modify parameter setting to improve accuracy ([!1364](https://gitee.com/mindspore/mindspore/pulls/2370))
+    * Wide&Deep, Move Wide&Deep from `example` to `model_zoo`, optimize network for better performance. ([!2221](https://gitee.com/mindspore/mindspore/pulls/2221))
+* Python API
+    * Fix bug in auto cast([!1766](https://gitee.com/mindspore/mindspore/pulls/1766))
+    * Fix bug of register_backward_hook([!2148](https://gitee.com/mindspore/mindspore/pulls/2148))
+    * Fix bug of tuple args in pynative mode([!1878](https://gitee.com/mindspore/mindspore/pulls/1878))
+    * Fix bug of checking numbers of arguments and graph parameters([!1701](https://gitee.com/mindspore/mindspore/pulls/1701))
+* Executor
+    * Fix bug of loading input data repeatedly in pynative mode([!1966](https://gitee.com/mindspore/mindspore/pulls/1966))
+    * Fix bug of list cannot be used as input in pynative mode([!1765](https://gitee.com/mindspore/mindspore/pulls/1765))
+    * Fix bug of kernel select ([!2103](https://gitee.com/mindspore/mindspore/pulls/2103))
+    * Fix bug of pattern matching for batchnorm fusion in the case of auto mix precision.([!1851](https://gitee.com/mindspore/mindspore/pulls/1851))
+    * Fix bug of generate hccl's kernel info.([!2393](https://gitee.com/mindspore/mindspore/mindspore/pulls/2393))
+* GPU platform
+    * Fix bug of summary feature invalid([!2173](https://gitee.com/mindspore/mindspore/pulls/2173))
+* Data processing
+    * Fix bug of Cifar dataset reading([!2096](https://gitee.com/mindspore/mindspore/pulls/2096))
+    * Fix bug of C++ behavior in RandomCropAndResize([!2026](https://gitee.com/mindspore/mindspore/pulls/2026))
+    * Fix the bug of mindrecord shuffle([!2420](https://gitee.com/mindspore/mindspore/pulls/2420))
+
+## Contributors
+Thanks goes to these wonderful people:
+
+Alexey Shevlyakov, avakh, baihuawei, BowenK, buxue, caifubi, caojian05, Cathy Wong, changzherui, chenfei, chengxianbin, chenhaozhe, chenjianping, chentingting, chenzomi, chujinjin, Danish Farid, dayschan, dengwentao, dinghao, etone-chan, fangzehua, fary86, geekun, Giancarlo Colmenares, gong chen, gukecai, guohongzilong, hangangqiang, heleiwang, hesham, He Wei, hexia, hongxing, huangdongrun, huanghui, islam_amin, Jamie Nisbet, Jesse Lee, jiangjinsheng, jiangzhiwen, jinyaohui, jjfeing, jojobugfree, Jonathan Yan, jonyguo, Junhan Hu, Kang, kingfo, kouzhenzhong, kpy, kswang, laiyongqiang, leopz, liangzelang, lichenever, lihongkang, Li Hongzhang, lilei, limingqi107, lirongzhen1, liubuyu, liuchongming74, liuwenhao4, liuxiao, Lixia Chen, liyanliu, liyong, lizhenyu, lvliang, Mahdi, Margaret_wangrui, meixiaowei, ms_yan, nhussain, ougongchang, panfengfeng, panyifeng, peilinwang, Peilin Wang, pkuliuliu, qianlong, rick_sanchez, shibeiji, Shida He, shijianning, simson, sunsuodong, suteng, Tinazhang, Tron Zhang, unknown, VectorSL, wandongdong, wangcong, wangdongxu, wangdongxu6, wanghua, wangnan39, Wei Luning, wenchunjiang, wenkai, wilfChen, WilliamLian, wukesong, Xian Weizhao, Xiaoda Zhang, xiefangqi, xulei2020, xunxue, xutianchun, Yang, yanghaitao, yanghaitao1, yanghaoran, yangjie, yangjie159, YangLuo, Yanjun Peng, yankai, yanzhenxiang2020, yao_yf, Yi Huaijie, yoonlee666, yuchaojie, yujianfeng, zhangzhongpeng, zhangdengcheng, Zhang Qinghua, zhangyinxia, zhangz0911gm, zhaojichen, zhaoting, zhaozhenlong, zhoufeng, zhouneng, zhousiyi, Zirui Wu, Ziyan, zjun, ZPaC, lihongzhang, wangdongxu
+
+Contributions of any kind are welcome!
+
 # Release 0.3.0-alpha

 ## Major Features and Improvements
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit c460176523d039c8995f1d71089753725ebc0792
+Subproject commit df57a6cf9450e347d1854687d1fe66a420ee3b35
--- a/build.sh
+++ b/build.sh
@ -25,7 +25,7 @@ usage()
  echo "Usage:"
  echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
  echo "              [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
-  echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K]"
+  echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]"
  echo ""
  echo "Options:"
  echo "    -d Debug mode"
@ -50,10 +50,12 @@ usage()
  echo "    -D Enable dumping of function graph ir, default on"
  echo "    -z Compile dataset & mindrecord, default on"
  echo "    -M Enable MPI and NCCL for GPU training, gpu default on"
-  echo "    -V Specify the minimum required cuda version, default CUDA 9.2"
+  echo "    -V Specify the minimum required cuda version, default CUDA 10.1"
  echo "    -I Compile predict, default off"
-  echo "    -K Compile with AKG, default off"
+  echo "    -K Compile with AKG, default on"
  echo "    -s Enable serving module, default off"
+  echo "    -B Enable debugger, default off"
+  echo "    -E Enable IBVERBS for parameter server, default off"
 }

 # check value of input is 'on' or 'off'
@ -88,14 +90,17 @@ checkopts()
  ENABLE_DUMP_IR="on"
  COMPILE_MINDDATA="on"
  ENABLE_MPI="off"
-  CUDA_VERSION="9.2"
+  CUDA_VERSION="10.1"
  COMPILE_PREDICT="off"
  USE_GLOG="on"
  PREDICT_PLATFORM=""
  ENABLE_AKG="on"
  ENABLE_SERVING="off"
+  ENABLE_DEBUGGER="off"
+  ENABLE_IBVERBS="off"
+
  # Process the options
-  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:s' opt
+  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E' opt
  do
    OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
    case "${opt}" in
@ -191,6 +196,10 @@ checkopts()
          usage
          exit 1
        fi
+        if [[ "X$OPTARG" == "X9.2" ]]; then
+          echo "Unsupported CUDA version 9.2"
+          exit 1
+        fi
        CUDA_VERSION="$OPTARG"
        ;;
      P)
@ -240,6 +249,15 @@ checkopts()
        ENABLE_SERVING="on"
        echo "enable serving"
        ;;
+      B)
+        check_on_off $OPTARG B
+        ENABLE_DEBUGGER="on"
+        echo "enable debugger"
+        ;;
+      E)
+        ENABLE_IBVERBS="on"
+        echo "enable IBVERBS for parameter server"
+        ;;
      *)
        echo "Unknown option ${opt}!"
        usage
@ -322,7 +340,13 @@ build_mindspore()
    if [[ "X$ENABLE_SERVING" = "Xon" ]]; then
        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_SERVING=ON"
    fi
+    if [[ "X$ENABLE_DEBUGGER" = "Xon" ]]; then
+        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DEBUGGER=ON"
+    fi

+    if [[ "X$ENABLE_IBVERBS" = "Xon" ]]; then
+        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_IBVERBS=ON"
+    fi
    echo "${CMAKE_ARGS}"
    if [[ "X$INC_BUILD" = "Xoff" ]]; then
      cmake ${CMAKE_ARGS} ../..
@ -446,9 +470,9 @@ build_predict()

    cd "${BASEPATH}/predict/output/"
    if [[ "$PREDICT_PLATFORM" == "x86_64" ]]; then
-      tar -cf MSPredict-0.3.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
+      tar -cf MSPredict-0.5.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
    elif [[ "$PREDICT_PLATFORM" == "arm64" ]]; then
-      tar -cf MSPredict-0.3.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
+      tar -cf MSPredict-0.5.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
    fi
    echo "success to build predict project!"
 }
--- a/cmake/external_libs/absl.cmake
+++ b/cmake/external_libs/absl.cmake
@ -0,0 +1,14 @@
+mindspore_add_pkg(absl
+        VER 20200225.2
+        LIBS absl_strings absl_throw_delegate absl_raw_logging_internal absl_int128 absl_bad_optional_access
+        URL https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz
+        MD5 73f2b6e72f1599a9139170c29482ddc4
+        CMAKE_OPTION -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=TRUE)
+
+include_directories(${absl_INC})
+
+add_library(mindspore::absl_strings ALIAS absl::absl_strings)
+add_library(mindspore::absl_throw_delegate ALIAS absl::absl_throw_delegate)
+add_library(mindspore::absl_raw_logging_internal ALIAS absl::absl_raw_logging_internal)
+add_library(mindspore::absl_int128 ALIAS absl::absl_int128)
+add_library(mindspore::absl_bad_optional_access ALIAS absl::absl_bad_optional_access)
--- a/cmake/external_libs/c-ares.cmake
+++ b/cmake/external_libs/c-ares.cmake
@ -0,0 +1,12 @@
+mindspore_add_pkg(c-ares
+        VER 1.15.0
+        LIBS cares 
+        URL https://github.com/c-ares/c-ares/releases/download/cares-1_15_0/c-ares-1.15.0.tar.gz
+        MD5 d2391da274653f7643270623e822dff7
+        CMAKE_OPTION -DCMAKE_BUILD_TYPE:STRING=Release
+        -DCARES_SHARED:BOOL=OFF
+        -DCARES_STATIC:BOOL=ON
+        -DCARES_STATIC_PIC:BOOL=ON)
+
+include_directories(${c-ares_INC})
+add_library(mindspore::cares ALIAS c-ares::cares)
--- a/cmake/external_libs/grpc.cmake
+++ b/cmake/external_libs/grpc.cmake
@ -0,0 +1,110 @@
+set(grpc_USE_STATIC_LIBS ON)
+if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    set(grpc_CXXFLAGS "-fstack-protector-all -Wno-uninitialized -Wno-unused-parameter -fPIC -fvisibility=hidden -D_FORTIFY_SOURCE=2 -O2")
+elseif (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+    set(grpc_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fvisibility=hidden -D_FORTIFY_SOURCE=2 -O2")
+else()
+    set(grpc_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fvisibility=hidden -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2")
+endif()
+
+set(grpc_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
+
+
+if (EXISTS ${protobuf_ROOT}/lib64)
+  set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${protobuf_ROOT}/lib64/cmake/protobuf")
+else()
+  set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${protobuf_ROOT}/lib/cmake/protobuf")
+endif()
+message("grpc using Protobuf_DIR : " ${_FINDPACKAGE_PROTOBUF_CONFIG_DIR})
+
+if (EXISTS ${absl_ROOT}/lib64)
+  set(_FINDPACKAGE_ABSL_CONFIG_DIR "${absl_ROOT}/lib64/cmake/absl")
+else()
+  set(_FINDPACKAGE_ABSL_CONFIG_DIR "${absl_ROOT}/lib/cmake/absl")
+endif()
+message("grpc using absl_DIR : " ${_FINDPACKAGE_ABSL_CONFIG_DIR})
+
+set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "")
+if (OPENSSL_ROOT_DIR)
+  set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "-DOPENSSL_ROOT_DIR:PATH=${OPENSSL_ROOT_DIR}")
+endif()
+
+mindspore_add_pkg(grpc
+        VER 1.27.3
+        LIBS grpc++ grpc gpr upb address_sorting
+        EXE grpc_cpp_plugin
+        URL https://github.com/grpc/grpc/archive/v1.27.3.tar.gz
+        MD5 0c6c3fc8682d4262dd0e5e6fabe1a7e2
+        CMAKE_OPTION -DCMAKE_BUILD_TYPE:STRING=Release
+        -DgRPC_INSTALL:BOOL=ON
+        -DgRPC_BUILD_TESTS:BOOL=OFF
+        -DgRPC_PROTOBUF_PROVIDER:STRING=package
+        -DgRPC_PROTOBUF_PACKAGE_TYPE:STRING=CONFIG
+        -DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
+        -DgRPC_ZLIB_PROVIDER:STRING=package
+        -DZLIB_ROOT:PATH=${zlib_ROOT}
+        -DgRPC_ABSL_PROVIDER:STRING=package
+        -Dabsl_DIR:PATH=${_FINDPACKAGE_ABSL_CONFIG_DIR}
+        -DgRPC_CARES_PROVIDER:STRING=package
+        -Dc-ares_DIR:PATH=${c-ares_ROOT}/lib/cmake/c-ares
+        -DgRPC_SSL_PROVIDER:STRING=package
+        ${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
+        )
+
+include_directories(${grpc_INC})
+
+add_library(mindspore::grpc++ ALIAS grpc::grpc++)
+
+# link other grpc libs
+target_link_libraries(grpc::grpc++ INTERFACE grpc::grpc grpc::gpr grpc::upb grpc::address_sorting)
+
+# link built dependencies
+target_link_libraries(grpc::grpc++ INTERFACE mindspore::z)
+target_link_libraries(grpc::grpc++ INTERFACE mindspore::cares)
+target_link_libraries(grpc::grpc++ INTERFACE mindspore::absl_strings mindspore::absl_throw_delegate
+                      mindspore::absl_raw_logging_internal mindspore::absl_int128 mindspore::absl_bad_optional_access)
+
+# link system openssl
+find_package(OpenSSL REQUIRED)
+target_link_libraries(grpc::grpc++ INTERFACE OpenSSL::SSL OpenSSL::Crypto)
+
+
+function(ms_grpc_generate c_var h_var)
+    if(NOT ARGN)
+        message(SEND_ERROR "Error: ms_grpc_generate() called without any proto files")
+        return()
+    endif()
+
+    set(${c_var})
+    set(${h_var})
+
+    foreach(file ${ARGN})
+        get_filename_component(abs_file ${file} ABSOLUTE)
+        get_filename_component(file_name ${file} NAME_WE)
+        get_filename_component(file_dir ${abs_file} PATH)
+        file(RELATIVE_PATH rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${file_dir})
+
+        list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc")
+        list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h")
+        list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.grpc.pb.cc")
+        list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.grpc.pb.h")
+
+        add_custom_command(
+                OUTPUT "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc"
+                "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h"
+                "${CMAKE_BINARY_DIR}/proto/${file_name}.grpc.pb.cc"
+                "${CMAKE_BINARY_DIR}/proto/${file_name}.grpc.pb.h"
+                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+                COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/proto"
+                COMMAND protobuf::protoc --version
+                COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/proto
+                --grpc_out=${CMAKE_BINARY_DIR}/proto --plugin=protoc-gen-grpc=$<TARGET_FILE:grpc::grpc_cpp_plugin> ${abs_file}
+                DEPENDS protobuf::protoc grpc::grpc_cpp_plugin ${abs_file}
+                COMMENT "Running C++ gRPC compiler on ${file}" VERBATIM)
+    endforeach()
+
+    set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
+    set(${c_var} ${${c_var}} PARENT_SCOPE)
+    set(${h_var} ${${h_var}} PARENT_SCOPE)
+
+endfunction()
--- a/cmake/external_libs/pslite.cmake
+++ b/cmake/external_libs/pslite.cmake
@ -0,0 +1,14 @@
+set(pslite_USE_STATIC_LIBS ON)
+if (${ENABLE_IBVERBS} STREQUAL "ON")
+    set(pslite_CXXFLAGS "USE_IBVERBS=1")
+endif()
+mindspore_add_pkg(pslite
+        LIBS ps
+        URL https://github.com/dmlc/ps-lite/archive/34fd45cae457d59850fdcb2066467778d0673f21.zip
+        MD5 393c0e27b68bfaf96718caa3aa96f5a3
+        PATCHES ${CMAKE_SOURCE_DIR}/third_party/patch/pslite/ps_lite.patch001
+        ONLY_MAKE True
+        ONLY_MAKE_INCS include/*
+        ONLY_MAKE_LIBS build/*)
+include_directories(${pslite_INC})
+add_library(mindspore::pslite ALIAS pslite::ps)
--- a/cmake/external_libs/zeromq.cmake
+++ b/cmake/external_libs/zeromq.cmake
@ -0,0 +1,5 @@
+mindspore_add_pkg(zeromq
+        VER 4.1.4
+        HEAD_ONLY ./
+        URL  https://raw.githubusercontent.com/mli/deps/master/build/zeromq-4.1.4.tar.gz
+        MD5  a611ecc93fffeb6d058c0e6edf4ad4fb)
--- a/cmake/external_libs/zlib.cmake
+++ b/cmake/external_libs/zlib.cmake
@ -0,0 +1,9 @@
+mindspore_add_pkg(zlib
+        VER 1.2.11
+        LIBS z
+        URL https://github.com/madler/zlib/archive/v1.2.11.tar.gz
+        MD5 0095d2d2d1f3442ce1318336637b695f
+        CMAKE_OPTION -DCMAKE_BUILD_TYPE:STRING=Release)
+
+include_directories(${zlib_INC})
+add_library(mindspore::z ALIAS zlib::z)
--- a/cmake/mind_expression.cmake
+++ b/cmake/mind_expression.cmake
@ -14,12 +14,26 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/eigen.cmake)
 include(${CMAKE_SOURCE_DIR}/cmake/external_libs/json.cmake)
 include(${CMAKE_SOURCE_DIR}/cmake/dependency_securec.cmake)
 include(${CMAKE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake)
+
+if (ENABLE_DEBUGGER)
+    # build dependencies of gRPC
+    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/absl.cmake)
+    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/c-ares.cmake)
+    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zlib.cmake)
+    # build gRPC
+    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/grpc.cmake)
+endif()
+
 include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pybind11.cmake)
 MESSAGE("go to link flatbuffers")
 include(${CMAKE_SOURCE_DIR}/cmake/external_libs/flatbuffers.cmake)
 if(USE_GLOG)
    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/glog.cmake)
 endif()
+if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zeromq.cmake)
+    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pslite.cmake)
+endif()

 find_package(Python3)
 include_directories(${Python3_INCLUDE_DIRS})
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@ -17,6 +17,8 @@ option(ENABLE_DUMP_E2E "Enable dump e2e file, default on" OFF)
 option(ENABLE_DUMP_IR "Enable dump funciton graph ir, default on" ON)
 option(ENABLE_MPI "enable mpi" OFF)
 option(ENABLE_AKG "enable akg" OFF)
+option(ENABLE_DEBUGGER "enable debugger" OFF)
+option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF)

 if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
    if (WIN32)
@ -112,3 +114,7 @@ endif()
 if(ENABLE_DUMP_E2E)
    add_compile_definitions(ENABLE_DUMP_E2E)
 endif()
+
+if(ENABLE_DEBUGGER)
+    add_compile_definitions(ENABLE_DEBUGGER)
+endif()
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@ -128,6 +128,11 @@ if (ENABLE_MPI)
        DESTINATION ${INSTALL_BASE_DIR}
        COMPONENT mindspore
    )
+    install(
+        TARGETS mpi_adapter
+        DESTINATION ${INSTALL_LIB_DIR}
+        COMPONENT mindspore
+    )
 endif ()

 if (ENABLE_GPU)
--- a/cmake/utils.cmake
+++ b/cmake/utils.cmake
@ -206,7 +206,7 @@ function(mindspore_add_pkg pkg_name )

    set(options )
    set(oneValueArgs URL MD5 GIT_REPOSITORY GIT_TAG VER EXE DIR HEAD_ONLY CMAKE_PATH RELEASE LIB_PATH CUSTOM_CMAKE)
-    set(multiValueArgs CMAKE_OPTION LIBS PRE_CONFIGURE_COMMAND CONFIGURE_COMMAND BUILD_OPTION INSTALL_INCS INSTALL_LIBS PATCHES SUBMODULES SOURCEMODULES)
+    set(multiValueArgs CMAKE_OPTION LIBS PRE_CONFIGURE_COMMAND CONFIGURE_COMMAND BUILD_OPTION INSTALL_INCS INSTALL_LIBS PATCHES SUBMODULES SOURCEMODULES ONLY_MAKE ONLY_MAKE_INCS ONLY_MAKE_LIBS)
    cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )

    if (NOT PKG_LIB_PATH)
@ -290,7 +290,7 @@ function(mindspore_add_pkg pkg_name )
    foreach(_PATCH_FILE ${PKG_PATCHES})
        get_filename_component(_PATCH_FILE_NAME ${_PATCH_FILE} NAME)
        set(_LF_PATCH_FILE ${CMAKE_BINARY_DIR}/_ms_patch/${_PATCH_FILE_NAME})
-        configure_file(${_PATCH_FILE} ${_LF_PATCH_FILE} NEWLINE_STYLE LF)
+        configure_file(${_PATCH_FILE} ${_LF_PATCH_FILE} NEWLINE_STYLE LF @ONLY)

        message("patching ${${pkg_name}_SOURCE_DIR} -p1 < ${_LF_PATCH_FILE}")
        execute_process(COMMAND ${Patch_EXECUTABLE} -p1 INPUT_FILE ${_LF_PATCH_FILE}
@ -324,6 +324,16 @@ function(mindspore_add_pkg pkg_name )
                target_include_directories(${pkg_name} INTERFACE ${${pkg_name}_INC})
            endif ()

+        elseif (PKG_ONLY_MAKE)
+            __exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} ${${pkg_name}_CXXFLAGS} -j${THNUM}
+                    WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR})
+            set(PKG_INSTALL_INCS ${PKG_ONLY_MAKE_INCS})
+            set(PKG_INSTALL_LIBS ${PKG_ONLY_MAKE_LIBS})
+            file(GLOB ${pkg_name}_INSTALL_INCS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_INCS})
+            file(GLOB ${pkg_name}_INSTALL_LIBS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_LIBS})
+            file(COPY ${${pkg_name}_INSTALL_INCS} DESTINATION ${${pkg_name}_BASE_DIR}/include)
+            file(COPY ${${pkg_name}_INSTALL_LIBS} DESTINATION ${${pkg_name}_BASE_DIR}/lib)
+
        elseif (PKG_CMAKE_OPTION)
            # in cmake
            file(MAKE_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build)
--- a/docker/mindspore-cpu/0.5.0-beta/Dockerfile
+++ b/docker/mindspore-cpu/0.5.0-beta/Dockerfile
@ -0,0 +1,67 @@
+FROM ubuntu:18.04
+
+MAINTAINER leonwanghui <leon.wanghui@huawei.com>
+
+# Set env
+ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
+ENV PATH /usr/local/bin:$PATH
+
+# Install base tools
+RUN apt update \
+    && DEBIAN_FRONTEND=noninteractive apt install -y \
+    vim \
+    wget \
+    curl \
+    xz-utils \
+    net-tools \
+    openssh-client \
+    git \
+    ntpdate \
+    tzdata \
+    tcl \
+    sudo \
+    bash-completion
+
+# Install compile tools
+RUN DEBIAN_FRONTEND=noninteractive apt install -y \
+    gcc \
+    g++ \
+    zlibc \
+    make \
+    libgmp-dev \
+    patch \
+    autoconf \
+    libtool \
+    automake \
+    flex
+
+# Set bash
+RUN echo "dash dash/sh boolean false" | debconf-set-selections
+RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
+
+# Install python (v3.7.5)
+RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
+    libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
+    && cd /tmp \
+    && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
+    && tar -xvf v3.7.5.tar.gz \
+    && cd /tmp/cpython-3.7.5 \
+    && mkdir -p ${PYTHON_ROOT_PATH} \
+    && ./configure --prefix=${PYTHON_ROOT_PATH} \
+    && make -j4 \
+    && make install -j4 \
+    && rm -f /usr/local/bin/python \
+    && rm -f /usr/local/bin/pip \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
+    && rm -rf /tmp/cpython-3.7.5 \
+    && rm -f /tmp/v3.7.5.tar.gz
+
+# Set pip source
+RUN mkdir -pv /root/.pip \
+    && echo "[global]" > /root/.pip/pip.conf \
+    && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
+    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
+
+# Install MindSpore cpu whl package
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.5.0-beta/MindSpore/cpu/ubuntu_x86/mindspore-0.5.0-cp37-cp37m-linux_x86_64.whl
--- a/docker/mindspore-gpu/0.5.0-beta/Dockerfile
+++ b/docker/mindspore-gpu/0.5.0-beta/Dockerfile
@ -0,0 +1,83 @@
+FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
+
+MAINTAINER leonwanghui <leon.wanghui@huawei.com>
+
+# Set env
+ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
+ENV OMPI_ROOT_PATH /usr/local/openmpi-3.1.5
+ENV PATH ${OMPI_ROOT_PATH}/bin:/usr/local/bin:$PATH
+ENV LD_LIBRARY_PATH ${OMPI_ROOT_PATH}/lib:$LD_LIBRARY_PATH
+
+# Install base tools
+RUN apt update \
+    && DEBIAN_FRONTEND=noninteractive apt install -y \
+    vim \
+    wget \
+    curl \
+    xz-utils \
+    net-tools \
+    openssh-client \
+    git \
+    ntpdate \
+    tzdata \
+    tcl \
+    sudo \
+    bash-completion
+
+# Install compile tools
+RUN DEBIAN_FRONTEND=noninteractive apt install -y \
+    gcc \
+    g++ \
+    zlibc \
+    make \
+    libgmp-dev \
+    patch \
+    autoconf \
+    libtool \
+    automake \
+    flex \
+    libnccl2=2.4.8-1+cuda10.1 \
+    libnccl-dev=2.4.8-1+cuda10.1
+
+# Set bash
+RUN echo "dash dash/sh boolean false" | debconf-set-selections
+RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
+
+# Install python (v3.7.5)
+RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
+    libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
+    && cd /tmp \
+    && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
+    && tar -xvf v3.7.5.tar.gz \
+    && cd /tmp/cpython-3.7.5 \
+    && mkdir -p ${PYTHON_ROOT_PATH} \
+    && ./configure --prefix=${PYTHON_ROOT_PATH} \
+    && make -j4 \
+    && make install -j4 \
+    && rm -f /usr/local/bin/python \
+    && rm -f /usr/local/bin/pip \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
+    && rm -rf /tmp/cpython-3.7.5 \
+    && rm -f /tmp/v3.7.5.tar.gz
+
+# Set pip source
+RUN mkdir -pv /root/.pip \
+    && echo "[global]" > /root/.pip/pip.conf \
+    && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
+    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
+
+# Install openmpi (v3.1.5)
+RUN cd /tmp \
+    && wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.5.tar.gz \
+    && tar -xvf openmpi-3.1.5.tar.gz \
+    && cd /tmp/openmpi-3.1.5 \
+    && mkdir -p ${OMPI_ROOT_PATH} \
+    && ./configure --prefix=${OMPI_ROOT_PATH} \
+    && make -j4 \
+    && make install -j4 \
+    && rm -rf /tmp/openmpi-3.1.5 \
+    && rm -f /tmp/openmpi-3.1.5.tar.gz
+
+# Install MindSpore cuda-10.1 whl package
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.5.0-beta/MindSpore/gpu/ubuntu_x86/cuda-10.1/mindspore_gpu-0.5.0-cp37-cp37m-linux_x86_64.whl
--- a/example/nlp_to_mindrecord/CLUERNER2020/README.md
+++ b/example/nlp_to_mindrecord/CLUERNER2020/README.md
@ -1,82 +0,0 @@
-# Guideline to Convert Training Data CLUERNER2020 to MindRecord For Bert Fine Tuning
-
-<!-- TOC -->
-
- [What does the example do](#what-does-the-example-do)
- [How to use the example to process CLUERNER2020](#how-to-use-the-example-to-process-cluerner2020)
-    - [Download CLUERNER2020 and unzip](#download-cluerner2020-and-unzip)
-    - [Generate MindRecord](#generate-mindrecord)
-    - [Create MindDataset By MindRecord](#create-minddataset-by-mindrecord)
-
-
-<!-- /TOC -->
-
-## What does the example do
-
-This example is based on [CLUERNER2020](https://www.cluebenchmarks.com/introduce.html) training data, generating MindRecord file, and finally used for Bert Fine Tuning progress.
-
-1.  run.sh: generate MindRecord entry script
-2.  run_read.py: create MindDataset by MindRecord entry script.
-    - create_dataset.py: use MindDataset to read MindRecord to generate dataset.
-
-## How to use the example to process CLUERNER2020
-
-Download CLUERNER2020, convert it to MindRecord, use MindDataset to read MindRecord.
-
-### Download CLUERNER2020 and unzip
-
-1. Download the training data zip.
-    > [CLUERNER2020 dataset download address](https://www.cluebenchmarks.com/introduce.html) **-> 任务介绍 -> CLUENER 细粒度命名实体识别 -> cluener下载链接**
-
-2. Unzip the training data to dir example/nlp_to_mindrecord/CLUERNER2020/cluener_public.
-    ```
-    unzip -d {your-mindspore}/example/nlp_to_mindrecord/CLUERNER2020/data/cluener_public cluener_public.zip
-    ```
-
-### Generate MindRecord
-
-1. Run the run.sh script.
-    ```bash
-    bash run.sh
-    ```
-
-2. Output like this:
-    ```
-    ...
-    [INFO] ME(17603:139620983514944,MainProcess):2020-04-28-16:56:12.498.235 [mindspore/mindrecord/filewriter.py:313] The list of mindrecord files created are: ['data/train.mindrecord'], and the list of index files are: ['data/train.mindrecord.db']
-    ...
-    [INFO] ME(17603,python):2020-04-28-16:56:13.400.175 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
-    [INFO] ME(17603,python):2020-04-28-16:56:13.400.863 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
-    [INFO] ME(17603,python):2020-04-28-16:56:13.401.534 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
-    [INFO] ME(17603,python):2020-04-28-16:56:13.402.179 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
-    [INFO] ME(17603,python):2020-04-28-16:56:13.402.702 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
-    ...
-    [INFO] ME(17603:139620983514944,MainProcess):2020-04-28-16:56:13.431.208 [mindspore/mindrecord/filewriter.py:313] The list of mindrecord files created are: ['data/dev.mindrecord'], and the list of index files are: ['data/dev.mindrecord.db']
-    ```
-
-3. Generate files like this:
-    ```bash
-    $ ls output/
-    dev.mindrecord  dev.mindrecord.db  README.md  train.mindrecord  train.mindrecord.db
-    ```
-
-### Create MindDataset By MindRecord
-
-1. Run the run_read.sh script.
-    ```bash
-    bash run_read.sh
-    ```
-
-2. Output like this:
-    ```
-    ...
-    example 1340: input_ids: [ 101 3173 1290 4852 7676 3949  122 3299  123  126 3189 4510 8020 6381 5442 7357 2590 3636 8021 7676 3949 4294 1166 6121 3124 1277 6121 3124 7270 2135 3295 5789 3326 123  126 3189 1355 6134 1093 1325 3173 2399 6590 6791 8024  102    0    0    0    0    0    0    0    0    0    0   0    0    0    0    0    0    0    0]
-    example 1340: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1  1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
-    example 1340: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
-    example 1340: label_ids: [ 0 18 19 20  2  4  0  0  0  0  0  0  0 34 36 26 27 28  0 34 35 35 35 35 35 35 35 35 35 36 26 27 28  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
-    example 1341: input_ids: [ 101 1728  711 4293 3868 1168 2190 2150 3791  934 3633 3428 4638 6237 7025 8024 3297 1400 5310 3362 6206 5023 5401 1744 3297 7770 3791 7368  976 1139 1104 2137  511 102    0    0    0    0    0    0    0    0   0    0    0    0    0    0    0    0    0    0    0    0    0    0   0    0    0    0    0    0    0    0]
-    example 1341: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
-    example 1341: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
-   example 1341: label_ids: [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 18 19 19 19 19 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
-    ...
-    ```
--- a/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py
+++ b/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py
@ -1,36 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""create MindDataset by MindRecord"""
-import mindspore.dataset as ds
-
-def create_dataset(data_file):
-    """create MindDataset"""
-    num_readers = 4
-    data_set = ds.MindDataset(dataset_file=data_file, num_parallel_workers=num_readers, shuffle=True)
-    index = 0
-    for item in data_set.create_dict_iterator():
-        # print("example {}: {}".format(index, item))
-        print("example {}: input_ids: {}".format(index, item['input_ids']))
-        print("example {}: input_mask: {}".format(index, item['input_mask']))
-        print("example {}: segment_ids: {}".format(index, item['segment_ids']))
-        print("example {}: label_ids: {}".format(index, item['label_ids']))
-        index += 1
-        if index % 1000 == 0:
-            print("read rows: {}".format(index))
-    print("total rows: {}".format(index))
-
-if __name__ == '__main__':
-    create_dataset('output/train.mindrecord')
-    create_dataset('output/dev.mindrecord')
--- a/example/nlp_to_mindrecord/CLUERNER2020/data/.gitignore
+++ b/example/nlp_to_mindrecord/CLUERNER2020/data/.gitignore
@ -1 +0,0 @@
-cluener_public
--- a/example/nlp_to_mindrecord/CLUERNER2020/output/README.md
+++ b/example/nlp_to_mindrecord/CLUERNER2020/output/README.md
@ -1 +0,0 @@
-## output dir
--- a/example/nlp_to_mindrecord/CLUERNER2020/run.sh
+++ b/example/nlp_to_mindrecord/CLUERNER2020/run.sh
@ -1,40 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-rm -f output/train.mindrecord*
-rm -f output/dev.mindrecord*
-
-if [ ! -d "../../../third_party/to_mindrecord/CLUERNER2020" ]; then
-    echo "The patch base dir ../../../third_party/to_mindrecord/CLUERNER2020 is not exist."
-    exit 1
-fi
-
-if [ ! -f "../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch" ]; then
-    echo "The patch file ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch is not exist."
-    exit 1
-fi
-
-# patch for data_processor_seq.py
-patch -p0 -d ../../../third_party/to_mindrecord/CLUERNER2020/ -o data_processor_seq_patched.py < ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch
-if [ $? -ne 0 ]; then
-    echo "Patch ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq.py failed"
-    exit 1
-fi
-
-# use patched script
-python ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq_patched.py \
--vocab_file=../../../third_party/to_mindrecord/CLUERNER2020/vocab.txt \
--label2id_file=../../../third_party/to_mindrecord/CLUERNER2020/label2id.json
--- a/example/nlp_to_mindrecord/aclImdb_preprocess/data/README.md
+++ b/example/nlp_to_mindrecord/aclImdb_preprocess/data/README.md
@ -1 +0,0 @@
-## The input dataset
--- a/example/nlp_to_mindrecord/enwiki/README.md
+++ b/example/nlp_to_mindrecord/enwiki/README.md
@ -1,173 +0,0 @@
-# Guideline to Convert Training Data enwiki to MindRecord For Bert Pre Training
-
-<!-- TOC -->
-
- [What does the example do](#what-does-the-example-do)
- [How to use the example to process enwiki](#how-to-use-the-example-to-process-enwiki)
-    - [Download enwiki training data](#download-enwiki-training-data)
-    - [Process the enwiki](#process-the-enwiki)
-    - [Generate MindRecord](#generate-mindrecord)
-    - [Create MindDataset By MindRecord](#create-minddataset-by-mindrecord)
-
-
-<!-- /TOC -->
-
-## What does the example do
-
-This example is based on [enwiki](https://dumps.wikimedia.org/enwiki) training data, generating MindRecord file, and finally used for Bert network training.
-
-1.  run.sh: generate MindRecord entry script.
-2.  run_read.py: create MindDataset by MindRecord entry script.
-    - create_dataset.py: use MindDataset to read MindRecord to generate dataset.
-
-## How to use the example to process enwiki
-
-Download enwiki data, process it, convert it to MindRecord, use MindDataset to read MindRecord.
-
-### Download enwiki training data
-
-> [enwiki dataset download address](https://dumps.wikimedia.org/enwiki) **-> 20200501 -> enwiki-20200501-pages-articles-multistream.xml.bz2**
-
-### Process the enwiki
-
-1. Please follow the steps in [process enwiki](https://github.com/mlperf/training/tree/master/language_model/tensorflow/bert)
- All permissions of this step belong to the link address website.
-
-### Generate MindRecord
-
-1. Run the run.sh script.
-    ```
-    bash run.sh input_dir output_dir vocab_file
-    ```
-    - input_dir: the directory which contains files like 'part-00251-of-00500'.
-    - output_dir: which will store the output mindrecord files.
-    - vocab_file: the vocab file which you can download from other opensource project.
-
-2. The output like this:
-    ```
-    ...
-    Begin preprocess Wed Jun 10 09:21:23 CST 2020
-    Begin preprocess input file: /mnt/data/results/part-00000-of-00500
-    Begin output file: part-00000-of-00500.mindrecord
-    Total task: 510, processing: 1
-    Begin preprocess input file: /mnt/data/results/part-00001-of-00500
-    Begin output file: part-00001-of-00500.mindrecord
-    Total task: 510, processing: 2
-    Begin preprocess input file: /mnt/data/results/part-00002-of-00500
-    Begin output file: part-00002-of-00500.mindrecord
-    Total task: 510, processing: 3
-    Begin preprocess input file: /mnt/data/results/part-00003-of-00500
-    Begin output file: part-00003-of-00500.mindrecord
-    Total task: 510, processing: 4
-    Begin preprocess input file: /mnt/data/results/part-00004-of-00500
-    Begin output file: part-00004-of-00500.mindrecord
-    Total task: 510, processing: 4
-    ...
-    ```
-
-3. Generate files like this:
-    ```bash
-    $ ls {your_output_dir}/
-    part-00000-of-00500.mindrecord part-00000-of-00500.mindrecord.db part-00001-of-00500.mindrecord part-00001-of-00500.mindrecord.db part-00002-of-00500.mindrecord part-00002-of-00500.mindrecord.db ...
-    ```
-
-### Create MindDataset By MindRecord
-
-1. Run the run_read.sh script.
-    ```bash
-    bash run_read.sh input_dir
-    ```
-    - input_dir: the directory which contains mindrecord files.
-
-2. The output like this:
-    ```
-    ...
-    example 633: input_ids: [  101  2043 19781  4305  2140  4520  2041  1010   103  2034  2455  2002
-      7879  2003  1996  2455  1997   103 26378  4160  1012   102  7291  2001
-      1996   103  1011  2343  1997  6327  1010  3423  1998   103  4262  2005
-      1996  2118  1997  2329  3996   103   102     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0]
-    example 633: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
-     1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
-    example 633: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
-     1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
-    example 633: masked_lm_positions: [ 8 17 20 25 33 41  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
-      0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
-      0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
-      0  0  0  0]
-    example 633: masked_lm_ids: [ 1996 16137  1012  3580  2451  1012     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0     0     0     0     0     0     0     0     0
-         0     0     0     0]
-    example 633: masked_lm_weights: [1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
-     0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
-     0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
-     0. 0. 0. 0.]
-    example 633: next_sentence_labels: [1]
-    ...
-    ```
--- a/example/nlp_to_mindrecord/enwiki/create_dataset.py
+++ b/example/nlp_to_mindrecord/enwiki/create_dataset.py
@ -1,43 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""create MindDataset by MindRecord"""
-import argparse
-import mindspore.dataset as ds
-
-def create_dataset(data_file):
-    """create MindDataset"""
-    num_readers = 4
-    data_set = ds.MindDataset(dataset_file=data_file, num_parallel_workers=num_readers, shuffle=True)
-    index = 0
-    for item in data_set.create_dict_iterator():
-        # print("example {}: {}".format(index, item))
-        print("example {}: input_ids: {}".format(index, item['input_ids']))
-        print("example {}: input_mask: {}".format(index, item['input_mask']))
-        print("example {}: segment_ids: {}".format(index, item['segment_ids']))
-        print("example {}: masked_lm_positions: {}".format(index, item['masked_lm_positions']))
-        print("example {}: masked_lm_ids: {}".format(index, item['masked_lm_ids']))
-        print("example {}: masked_lm_weights: {}".format(index, item['masked_lm_weights']))
-        print("example {}: next_sentence_labels: {}".format(index, item['next_sentence_labels']))
-        index += 1
-        if index % 1000 == 0:
-            print("read rows: {}".format(index))
-    print("total rows: {}".format(index))
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-i", "--input_file", nargs='+', type=str, help='Input mindreord file')
-    args = parser.parse_args()
-
-    create_dataset(args.input_file)
--- a/example/nlp_to_mindrecord/enwiki/run.sh
+++ b/example/nlp_to_mindrecord/enwiki/run.sh
@ -1,133 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-if [ $# -ne 3 ]; then
-    echo "Usage: $0 input_dir output_dir vocab_file"
-    exit 1
-fi
-
-if [ ! -d $1 ]; then
-    echo "The input dir: $1 is not exist."
-    exit 1
-fi
-
-if [ ! -d $2 ]; then
-    echo "The output dir: $2 is not exist."
-    exit 1
-fi
-rm -fr $2/*.mindrecord*
-
-if [ ! -f $3 ]; then
-    echo "The vocab file: $3 is not exist."
-    exit 1
-fi
-
-data_dir=$1
-output_dir=$2
-vocab_file=$3
-file_list=()
-output_filename=()
-file_index=0
-
-function getdir() {
-    elements=`ls $1`
-    for element in ${elements[*]};
-    do
-        dir_or_file=$1"/"$element
-        if [ -d $dir_or_file ];
-        then
-            getdir $dir_or_file
-        else
-            file_list[$file_index]=$dir_or_file
-            echo "${dir_or_file}" | tr '/' '\n' > dir_file_list.txt   # dir dir file to mapfile
-            mapfile parent_dir < dir_file_list.txt
-            rm dir_file_list.txt >/dev/null 2>&1
-            tmp_output_filename=${parent_dir[${#parent_dir[@]}-1]}".mindrecord"
-            output_filename[$file_index]=`echo ${tmp_output_filename} | sed 's/ //g'`
-            file_index=`expr $file_index + 1`
-        fi
-    done
-}
-
-getdir "${data_dir}"
-# echo "The input files: "${file_list[@]}
-# echo "The output files: "${output_filename[@]}
-
-if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
-    echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
-    exit 1
-fi
-
-if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
-    echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
-    exit 1
-fi
-
-# patch for create_pretraining_data.py
-patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
-if [ $? -ne 0 ]; then
-    echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
-    exit 1
-fi
-
-# get the cpu core count
-num_cpu_core=`cat /proc/cpuinfo | grep "processor" | wc -l`
-avaiable_core_size=`expr $num_cpu_core / 3 \* 2`
-
-echo "Begin preprocess `date`"
-
-# using patched script to generate mindrecord
-file_list_len=`expr ${#file_list[*]} - 1`
-for index in $(seq 0 $file_list_len); do
-    echo "Begin preprocess input file: ${file_list[$index]}"
-    echo "Begin output file: ${output_filename[$index]}"
-    python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
-        --input_file=${file_list[$index]} \
-        --output_file=${output_dir}/${output_filename[$index]} \
-        --partition_number=1 \
-        --vocab_file=${vocab_file} \
-        --do_lower_case=True \
-        --max_seq_length=512 \
-        --max_predictions_per_seq=76 \
-        --masked_lm_prob=0.15 \
-        --random_seed=12345 \
-        --dupe_factor=10 >/tmp/${output_filename[$index]}.log 2>&1 &
-    process_count=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
-    echo "Total task: ${#file_list[*]}, processing: ${process_count}"
-    if [ $process_count -ge $avaiable_core_size ]; then
-        while [ 1 ]; do
-            process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
-            if [ $process_count -gt $process_num ]; then
-                process_count=$process_num
-                break;
-            fi
-            sleep 2
-        done
-    fi
-done
-
-process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
-while [ 1 ]; do
-    if [ $process_num -eq 0 ]; then
-        break;
-    fi
-    echo "There are still ${process_num} preprocess running ..."
-    sleep 2
-    process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
-done
-
-echo "Preprocess all the data success."
-echo "End preprocess `date`"
--- a/example/nlp_to_mindrecord/zhwiki/README.md
+++ b/example/nlp_to_mindrecord/zhwiki/README.md
@ -1,113 +0,0 @@
-# Guideline to Convert Training Data zhwiki to MindRecord For Bert Pre Training
-
-<!-- TOC -->
-
- [What does the example do](#what-does-the-example-do)
- [Run simple test](#run-simple-test)
- [How to use the example to process zhwiki](#how-to-use-the-example-to-process-zhwiki)
-    - [Download zhwiki training data](#download-zhwiki-training-data)
-    - [Extract the zhwiki](#extract-the-zhwiki)
-    - [Generate MindRecord](#generate-mindrecord)
-    - [Create MindDataset By MindRecord](#create-minddataset-by-mindrecord)
-
-
-<!-- /TOC -->
-
-## What does the example do
-
-This example is based on [zhwiki](https://dumps.wikimedia.org/zhwiki) training data, generating MindRecord file, and finally used for Bert network training.
-
-1.  run.sh: generate MindRecord entry script.
-2.  run_read.py: create MindDataset by MindRecord entry script.
-    - create_dataset.py: use MindDataset to read MindRecord to generate dataset.
-
-## Run simple test
-
-Follow the step:
-
-```bash
-bash run_simple.sh         # generate output/simple.mindrecord* by ../../../third_party/to_mindrecord/zhwiki/sample_text.txt
-bash run_read_simple.sh    # use MindDataset to read output/simple.mindrecord*
-```
-
-## How to use the example to process zhwiki
-
-Download zhwiki data, extract it, convert it to MindRecord, use MindDataset to read MindRecord.
-
-### Download zhwiki training data
-
-> [zhwiki dataset download address](https://dumps.wikimedia.org/zhwiki) **-> 20200401 -> zhwiki-20200401-pages-articles-multistream.xml.bz2**
-
- put the zhwiki-20200401-pages-articles-multistream.xml.bz2 in {your-mindspore}/example/nlp_to_mindrecord/zhwiki/data directory.
-
-### Extract the zhwiki
-
-1. Download [wikiextractor](https://github.com/attardi/wikiextractor) script to {your-mindspore}/example/nlp_to_mindrecord/zhwiki/data directory.
-
-    ```
-    $ ls data/
-    README.md  wikiextractor  zhwiki-20200401-pages-articles-multistream.xml.bz2
-    ```
-
-2. Extract the zhwiki.
-    ```python
-    python data/wikiextractor/WikiExtractor.py data/zhwiki-20200401-pages-articles-multistream.xml.bz2 --processes 4 --templates data/template --bytes 8M --min_text_length 0 --filter_disambig_pages --output data/extract
-    ```
-
-3. Generate like this:
-    ```
-    $ ls data/extract
-    AA AB
-    ```
-
-### Generate MindRecord
-
-1. Run the run.sh script.
-    ```
-    bash run.sh
-    ```
-    > Caution: This process maybe slow, please wait patiently. If you do not have a machine with enough memory and cpu, it is recommended that you modify the script to generate mindrecord in step by step.
-
-2. The output like this:
-    ```
-    patching file create_pretraining_data_patched.py (read from create_pretraining_data.py)
-    Begin preprocess input file: ./data/extract/AA/wiki_00
-    Begin output file: AAwiki_00.mindrecord
-    Total task: 5, processing: 1
-    Begin preprocess input file: ./data/extract/AA/wiki_01
-    Begin output file: AAwiki_01.mindrecord
-    Total task: 5, processing: 2
-    Begin preprocess input file: ./data/extract/AA/wiki_02
-    Begin output file: AAwiki_02.mindrecord
-    Total task: 5, processing: 3
-    Begin preprocess input file: ./data/extract/AB/wiki_02
-    Begin output file: ABwiki_02.mindrecord
-    Total task: 5, processing: 4
-    ...
-    ```
-
-3. Generate files like this:
-    ```bash
-    $ ls output/
-    AAwiki_00.mindrecord AAwiki_00.mindrecord.db AAwiki_01.mindrecord AAwiki_01.mindrecord.db AAwiki_02.mindrecord AAwiki_02.mindrecord.db ... ABwiki_00.mindrecord ABwiki_00.mindrecord.db ...
-    ```
-
-### Create MindDataset By MindRecord
-
-1. Run the run_read.sh script.
-    ```bash
-    bash run_read.sh
-    ```
-
-2. The output like this:
-    ```
-    ...
-    example 74: input_ids: [  101  8168   118 12847  8783  9977 15908   117  8256  9245 11643  8168  8847  8588 11575  8154  8228   143  8384  8376  9197 10241   103 10564 11421  8199 12268   112   161  8228 11541  9586  8436  8174  8363  9864  9702   103   103   119   103  9947 10564   103  8436  8806 11479   103  8912   119   103   103   103 12209  8303   103  8757  8824   117  8256   103  8619  8168 11541   102 11684  8196   103  8228  8847 11523   117  9059  9064 12410  8358  8181 10764   117 11167 11706  9920   148  8332 11390  8936  8205 10951 11997   103  8154   117   103  8670 10467   112   161 10951 13139 12413   117 10288   143 10425  8205   152 10795  8472  8196   103   161 12126  9172 13129 12106  8217  8174 12244  8205   143   103  8461  8277 10628   160  8221   119   102]
-    example 74: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
-    example 74: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
-    example 74: masked_lm_positions: [  6  22  37  38  40  43  47  50  51  52  55  60  67  76  89  92  98 109 120   0]
-    example 74: masked_lm_ids: [ 8118  8165  8329  8890  8554  8458   119  8850  8565 10392  8174 11467  10291  8181  8549 12718 13139   112   158     0]
-    example 74: masked_lm_weights: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.]
-    example 74: next_sentence_labels: [0]
-    ...
-    ```
--- a/example/nlp_to_mindrecord/zhwiki/create_dataset.py
+++ b/example/nlp_to_mindrecord/zhwiki/create_dataset.py
@ -1,43 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""create MindDataset by MindRecord"""
-import argparse
-import mindspore.dataset as ds
-
-def create_dataset(data_file):
-    """create MindDataset"""
-    num_readers = 4
-    data_set = ds.MindDataset(dataset_file=data_file, num_parallel_workers=num_readers, shuffle=True)
-    index = 0
-    for item in data_set.create_dict_iterator():
-        # print("example {}: {}".format(index, item))
-        print("example {}: input_ids: {}".format(index, item['input_ids']))
-        print("example {}: input_mask: {}".format(index, item['input_mask']))
-        print("example {}: segment_ids: {}".format(index, item['segment_ids']))
-        print("example {}: masked_lm_positions: {}".format(index, item['masked_lm_positions']))
-        print("example {}: masked_lm_ids: {}".format(index, item['masked_lm_ids']))
-        print("example {}: masked_lm_weights: {}".format(index, item['masked_lm_weights']))
-        print("example {}: next_sentence_labels: {}".format(index, item['next_sentence_labels']))
-        index += 1
-        if index % 1000 == 0:
-            print("read rows: {}".format(index))
-    print("total rows: {}".format(index))
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-i", "--input_file", nargs='+', type=str, help='Input mindreord file')
-    args = parser.parse_args()
-
-    create_dataset(args.input_file)
--- a/example/nlp_to_mindrecord/zhwiki/data/.gitignore
+++ b/example/nlp_to_mindrecord/zhwiki/data/.gitignore
@ -1,3 +0,0 @@
-wikiextractor/
-zhwiki-20200401-pages-articles-multistream.xml.bz2
-extract/
--- a/example/nlp_to_mindrecord/zhwiki/data/README.md
+++ b/example/nlp_to_mindrecord/zhwiki/data/README.md
@ -1 +0,0 @@
-## The input dataset
--- a/example/nlp_to_mindrecord/zhwiki/output/README.md
+++ b/example/nlp_to_mindrecord/zhwiki/output/README.md
@ -1 +0,0 @@
-## Output the mindrecord
--- a/example/nlp_to_mindrecord/zhwiki/run.sh
+++ b/example/nlp_to_mindrecord/zhwiki/run.sh
@ -1,112 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-rm -f output/*.mindrecord*
-
-data_dir="./data/extract"
-file_list=()
-output_filename=()
-file_index=0
-
-function getdir() {
-    elements=`ls $1`
-    for element in ${elements[*]};
-    do
-        dir_or_file=$1"/"$element
-        if [ -d $dir_or_file ];
-        then
-            getdir $dir_or_file
-        else
-            file_list[$file_index]=$dir_or_file
-            echo "${dir_or_file}" | tr '/' '\n' > dir_file_list.txt   # dir dir file to mapfile
-            mapfile parent_dir < dir_file_list.txt
-            rm dir_file_list.txt >/dev/null 2>&1
-            tmp_output_filename=${parent_dir[${#parent_dir[@]}-2]}${parent_dir[${#parent_dir[@]}-1]}".mindrecord"
-            output_filename[$file_index]=`echo ${tmp_output_filename} | sed 's/ //g'`
-            file_index=`expr $file_index + 1`
-        fi
-    done
-}
-
-getdir "${data_dir}"
-# echo "The input files: "${file_list[@]}
-# echo "The output files: "${output_filename[@]}
-
-if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
-    echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
-    exit 1
-fi
-
-if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
-    echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
-    exit 1
-fi
-
-# patch for create_pretraining_data.py
-patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
-if [ $? -ne 0 ]; then
-    echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
-    exit 1
-fi
-
-# get the cpu core count
-num_cpu_core=`cat /proc/cpuinfo | grep "processor" | wc -l`
-avaiable_core_size=`expr $num_cpu_core / 3 \* 2`
-
-echo "Begin preprocess `date`"
-
-# using patched script to generate mindrecord
-file_list_len=`expr ${#file_list[*]} - 1`
-for index in $(seq 0 $file_list_len); do
-    echo "Begin preprocess input file: ${file_list[$index]}"
-    echo "Begin output file: ${output_filename[$index]}"
-    python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
-        --input_file=${file_list[$index]} \
-        --output_file=output/${output_filename[$index]} \
-        --partition_number=1 \
-        --vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \
-        --do_lower_case=True \
-        --max_seq_length=128 \
-        --max_predictions_per_seq=20 \
-        --masked_lm_prob=0.15 \
-        --random_seed=12345 \
-        --dupe_factor=10 >/tmp/${output_filename[$index]}.log 2>&1 &   # user defined
-    process_count=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
-    echo "Total task: ${#file_list[*]}, processing: ${process_count}"
-    if [ $process_count -ge $avaiable_core_size ]; then
-        while [ 1 ]; do
-            process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
-            if [ $process_count -gt $process_num ]; then
-                process_count=$process_num
-                break;
-            fi
-            sleep 2
-        done
-    fi
-done
-
-process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
-while [ 1 ]; do
-    if [ $process_num -eq 0 ]; then
-        break;
-    fi
-    echo "There are still ${process_num} preprocess running ..."
-    sleep 2
-    process_num=`ps -ef | grep create_pretraining_data_patched | grep -v grep | wc -l`
-done
-
-echo "Preprocess all the data success."
-echo "End preprocess `date`"
--- a/example/nlp_to_mindrecord/zhwiki/run_simple.sh
+++ b/example/nlp_to_mindrecord/zhwiki/run_simple.sh
@ -1,47 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-rm -f output/simple.mindrecord*
-
-if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
-    echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
-    exit 1
-fi
-
-if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
-    echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
-    exit 1
-fi
-
-# patch for create_pretraining_data.py
-patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
-if [ $? -ne 0 ]; then
-    echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
-    exit 1
-fi
-
-# using patched script to generate mindrecord
-python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
--input_file=../../../third_party/to_mindrecord/zhwiki/sample_text.txt \
--output_file=output/simple.mindrecord \
--partition_number=4 \
--vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \
--do_lower_case=True \
--max_seq_length=128 \
--max_predictions_per_seq=20 \
--masked_lm_prob=0.15 \
--random_seed=12345 \
--dupe_factor=10    # user defined
--- a/example/resnet50_cifar10/README.md
+++ b/example/resnet50_cifar10/README.md
@ -1,137 +0,0 @@
-# ResNet-50 Example
-
-## Description
-
-This is an example of training ResNet-50 with CIFAR-10 dataset in MindSpore.
-
-## Requirements
-
- Install [MindSpore](https://www.mindspore.cn/install/en).
-
- Download the dataset CIFAR-10
-
-> Unzip the CIFAR-10 dataset to any path you want and the folder structure should include train and eval dataset as follows:
-> ```
-> .  
-> ├── cifar-10-batches-bin  # train dataset
-> └── cifar-10-verify-bin   # infer dataset
-> ```
-
-
-## Example structure
-
-```shell
-.
-├── config.py                       # parameter configuration
-├── dataset.py                      # data preprocessing
-├── eval.py                         # infer script
-├── lr_generator.py                 # generate learning rate for each step
-├── run_distribute_train.sh         # launch distributed training(8 pcs)
-├── run_infer.sh                    # launch infering
-├── run_standalone_train.sh         # launch standalone training(1 pcs)
-└── train.py                        # train script
-```
-
-
-## Parameter configuration
-
-Parameters for both training and inference can be set in config.py.
-
-```
-"class_num": 10,                  # dataset class num
-"batch_size": 32,                 # batch size of input tensor
-"loss_scale": 1024,               # loss scale
-"momentum": 0.9,                  # momentum
-"weight_decay": 1e-4,             # weight decay 
-"epoch_size": 90,                 # only valid for taining, which is always 1 for inference 
-"buffer_size": 100,               # number of queue size in data preprocessing
-"image_height": 224,              # image height
-"image_width": 224,               # image width
-"save_checkpoint": True,          # whether save checkpoint or not
-"save_checkpoint_steps": 195,     # the step interval between two checkpoints. By default, the last checkpoint will be saved after the last step
-"keep_checkpoint_max": 10,        # only keep the last keep_checkpoint_max checkpoint
-"save_checkpoint_path": "./",     # path to save checkpoint
-"warmup_epochs": 5,               # number of warmup epoch
-"lr_decay_mode": "poly"           # decay mode can be selected in steps, ploy and default
-"lr_init": 0.01,                  # initial learning rate
-"lr_end": 0.00001,                # final learning rate
-"lr_max": 0.1,                    # maximum learning rate
-```
-
-## Running the example
-
-### Train
-
-#### Usage
-
-```
-# distributed training
-Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH]
-
-# standalone training
-Usage: sh run_standalone_train.sh [DATASET_PATH]
-```
-
-
-#### Launch
-
-```
-# distribute training example
-sh run_distribute_train.sh rank_table.json ~/cifar-10-batches-bin
-
-# standalone training example
-sh run_standalone_train.sh ~/cifar-10-batches-bin
-```
-
-> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).
-
-#### Result
-
-Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". Under this, you can find checkpoint file together with result like the followings in log.
-
-```
-# distribute training result(8 pcs)
-epoch: 1 step: 195, loss is 1.9601055
-epoch: 2 step: 195, loss is 1.8555021
-epoch: 3 step: 195, loss is 1.6707983
-epoch: 4 step: 195, loss is 1.8162166
-epoch: 5 step: 195, loss is 1.393667
-```
-
-### Infer
-
-#### Usage
-
-```
-# infer
-Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]
-```
-
-#### Launch
-
-```
-# infer example
-sh run_infer.sh ~/cifar10-10-verify-bin ~/resnet50_cifar10/train_parallel0/resnet-90_195.ckpt
-```
-
-> checkpoint can be produced in training process.
-
-#### Result
-
-Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log.
-
-```
-result: {'acc': 0.91446314102564111} ckpt=~/resnet50_cifar10/train_parallel0/resnet-90_195.ckpt
-```
-
-### Running on GPU
-```
-# distributed training example
-mpirun -n 8 python train.py --dataset_path=~/cifar-10-batches-bin --device_target="GPU" --run_distribute=True
-
-# standalone training example
-python train.py --dataset_path=~/cifar-10-batches-bin --device_target="GPU"
-
-# infer example
-python eval.py --dataset_path=~/cifar10-10-verify-bin --device_target="GPU" --checkpoint_path=resnet-90_195.ckpt
-```
--- a/example/resnet50_cifar10/dataset.py
+++ b/example/resnet50_cifar10/dataset.py
@ -1,81 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-create train or eval dataset.
-"""
-import os
-import mindspore.common.dtype as mstype
-import mindspore.dataset.engine as de
-import mindspore.dataset.transforms.vision.c_transforms as C
-import mindspore.dataset.transforms.c_transforms as C2
-from mindspore.communication.management import init, get_rank, get_group_size
-from config import config
-
-
-def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
-    """
-    create a train or eval dataset
-
-    Args:
-        dataset_path(string): the path of dataset.
-        do_train(bool): whether dataset is used for train or eval.
-        repeat_num(int): the repeat times of dataset. Default: 1
-        batch_size(int): the batch size of dataset. Default: 32
-        target(str): the device target. Default: Ascend
-
-    Returns:
-        dataset
-    """
-    if target == "Ascend":
-        device_num = int(os.getenv("DEVICE_NUM"))
-        rank_id = int(os.getenv("RANK_ID"))
-    else:
-        init("nccl")
-        rank_id = get_rank()
-        device_num = get_group_size()
-
-    if device_num == 1:
-        ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True)
-    else:
-        ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True,
-                               num_shards=device_num, shard_id=rank_id)
-
-    # define map operations
-    trans = []
-    if do_train:
-        trans += [
-            C.RandomCrop((32, 32), (4, 4, 4, 4)),
-            C.RandomHorizontalFlip(prob=0.5)
-        ]
-
-    trans += [
-        C.Resize((config.image_height, config.image_width)),
-        C.Rescale(1.0 / 255.0, 0.0),
-        C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
-        C.HWC2CHW()
-    ]
-
-    type_cast_op = C2.TypeCast(mstype.int32)
-
-    ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
-    ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
-
-    # apply batch operations
-    ds = ds.batch(batch_size, drop_remainder=True)
-
-    # apply dataset repeat operation
-    ds = ds.repeat(repeat_num)
-
-    return ds
--- a/example/resnet50_cifar10/eval.py
+++ b/example/resnet50_cifar10/eval.py
@ -1,72 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-eval.
-"""
-import os
-import argparse
-from dataset import create_dataset
-from config import config
-from mindspore import context
-from mindspore.model_zoo.resnet import resnet50
-from mindspore.parallel._auto_parallel_context import auto_parallel_context
-from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
-from mindspore.train.model import Model, ParallelMode
-from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from mindspore.communication.management import init, get_group_size
-
-parser = argparse.ArgumentParser(description='Image classification')
-parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
-parser.add_argument('--device_num', type=int, default=1, help='Device num.')
-parser.add_argument('--do_train', type=bool, default=False, help='Do train or not.')
-parser.add_argument('--do_eval', type=bool, default=True, help='Do eval or not.')
-parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
-parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
-parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
-args_opt = parser.parse_args()
-
-if __name__ == '__main__':
-    target = args_opt.device_target
-    context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False)
-    if not args_opt.do_eval and args_opt.run_distribute:
-        if target == "Ascend":
-            device_id = int(os.getenv('DEVICE_ID'))
-            context.set_context(device_id=device_id)
-            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True)
-            auto_parallel_context().set_all_reduce_fusion_split_indices([140])
-            init()
-        elif target == "GPU":
-            init("nccl")
-            context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True)
-
-    epoch_size = config.epoch_size
-    net = resnet50(class_num=config.class_num)
-    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
-
-    if args_opt.do_eval:
-        dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size,
-                                 target=target)
-        step_size = dataset.get_dataset_size()
-
-        if args_opt.checkpoint_path:
-            param_dict = load_checkpoint(args_opt.checkpoint_path)
-            load_param_into_net(net, param_dict)
-        net.set_train(False)
-
-        model = Model(net, loss_fn=loss, metrics={'acc'})
-        res = model.eval(dataset)
-        print("result:", res, "ckpt=", args_opt.checkpoint_path)
--- a/example/resnet50_cifar10/train.py
+++ b/example/resnet50_cifar10/train.py
@ -1,97 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""train_imagenet."""
-import os
-import argparse
-import numpy as np
-from dataset import create_dataset
-from lr_generator import get_lr
-from config import config
-from mindspore import context
-from mindspore import Tensor
-from mindspore.model_zoo.resnet import resnet50
-from mindspore.parallel._auto_parallel_context import auto_parallel_context
-from mindspore.nn.optim.momentum import Momentum
-from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
-
-from mindspore.train.model import Model, ParallelMode
-
-from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
-from mindspore.train.loss_scale_manager import FixedLossScaleManager
-from mindspore.communication.management import init, get_rank, get_group_size
-
-parser = argparse.ArgumentParser(description='Image classification')
-parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
-parser.add_argument('--device_num', type=int, default=1, help='Device num.')
-parser.add_argument('--do_train', type=bool, default=True, help='Do train or not.')
-parser.add_argument('--do_eval', type=bool, default=False, help='Do eval or not.')
-parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
-parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
-args_opt = parser.parse_args()
-
-
-if __name__ == '__main__':
-    target = args_opt.device_target
-    ckpt_save_dir = config.save_checkpoint_path
-    context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False)
-    np.random.seed(1)
-    if not args_opt.do_eval and args_opt.run_distribute:
-        if target == "Ascend":
-            device_id = int(os.getenv('DEVICE_ID'))
-            context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id,
-                                enable_auto_mixed_precision=True)
-            init()
-            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True)
-            auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160])
-            ckpt_save_dir = config.save_checkpoint_path
-        elif target == "GPU":
-            context.set_context(mode=context.GRAPH_MODE, device_target="GPU", save_graphs=False)
-            init("nccl")
-            context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True)
-            ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"
-    epoch_size = config.epoch_size
-    net = resnet50(class_num=config.class_num)
-
-    if args_opt.do_train:
-        dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
-                                 repeat_num=epoch_size, batch_size=config.batch_size, target=target)
-        step_size = dataset.get_dataset_size()
-
-        loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
-        lr = Tensor(get_lr(global_step=0, lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max,
-                           warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size,
-                           lr_decay_mode='poly'))
-        opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
-                       config.weight_decay, config.loss_scale)
-        if target == 'GPU':
-            loss = SoftmaxCrossEntropyWithLogits(sparse=True, is_grad=False, reduction='mean')
-            opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum)
-            model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
-        else:
-            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
-            model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'},
-                          amp_level="O2", keep_batchnorm_fp32=False)
-
-        time_cb = TimeMonitor(data_size=step_size)
-        loss_cb = LossMonitor()
-        cb = [time_cb, loss_cb]
-        if config.save_checkpoint:
-            config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs*step_size,
-                                         keep_checkpoint_max=config.keep_checkpoint_max)
-            ckpt_cb = ModelCheckpoint(prefix="resnet", directory=ckpt_save_dir, config=config_ck)
-            cb += [ckpt_cb]
-        model.train(epoch_size, dataset, callbacks=cb)
--- a/example/resnet50_imagenet2012/README.md
+++ b/example/resnet50_imagenet2012/README.md
@ -1,150 +0,0 @@
-# ResNet-50 Example
-
-## Description
-
-This is an example of training ResNet-50 with ImageNet2012 dataset in MindSpore.
-
-## Requirements
-
- Install [MindSpore](https://www.mindspore.cn/install/en).
-
- Download the dataset ImageNet2012 
-
-> Unzip the ImageNet2012 dataset to any path you want and the folder structure should include train and eval dataset as follows:
-> ```
-> .  
-> ├── ilsvrc                  # train dataset
-> └── ilsvrc_eval             # infer dataset
-> ```
-
-
-## Example structure
-
-```shell
-.
-├── crossentropy.py                 # CrossEntropy loss function
-├── config.py                       # parameter configuration
-├── dataset.py                      # data preprocessing
-├── eval.py                         # infer script
-├── lr_generator.py                 # generate learning rate for each step
-├── run_distribute_train.sh         # launch distributed training(8 pcs)
-├── run_infer.sh                    # launch infering
-├── run_standalone_train.sh         # launch standalone training(1 pcs)
-└── train.py                        # train script
-```
-
-
-## Parameter configuration
-
-Parameters for both training and inference can be set in config.py.
-
-```
-"class_num": 1001,                # dataset class number
-"batch_size": 32,                 # batch size of input tensor
-"loss_scale": 1024,               # loss scale
-"momentum": 0.9,                  # momentum optimizer
-"weight_decay": 1e-4,             # weight decay 
-"epoch_size": 90,                 # only valid for taining, which is always 1 for inference 
-"pretrained_epoch_size": 1,       # epoch size that model has been trained before load pretrained checkpoint
-"buffer_size": 1000,              # number of queue size in data preprocessing
-"image_height": 224,              # image height
-"image_width": 224,               # image width
-"save_checkpoint": True,          # whether save checkpoint or not
-"save_checkpoint_epochs": 1,      # the epoch interval between two checkpoints. By default, the last checkpoint will be saved after the last epoch
-"keep_checkpoint_max": 10,        # only keep the last keep_checkpoint_max checkpoint
-"save_checkpoint_path": "./",     # path to save checkpoint relative to the executed path
-"warmup_epochs": 0,               # number of warmup epoch
-"lr_decay_mode": "cosine",        # decay mode for generating learning rate
-"label_smooth": True,             # label smooth
-"label_smooth_factor": 0.1,       # label smooth factor
-"lr_init": 0,                     # initial learning rate
-"lr_max": 0.1,                    # maximum learning rate
-```
-
-## Running the example
-
-### Train
-
-#### Usage
-
-```
-# distributed training
-Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
-
-# standalone training
-Usage: sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
-
-```
-
-
-#### Launch
-
-```bash
-# distributed training example(8 pcs)
-sh run_distribute_train.sh rank_table_8p.json dataset/ilsvrc
-
-# If you want to load pretrained ckpt file
-sh run_distribute_train.sh rank_table_8p.json dataset/ilsvrc ./pretrained.ckpt
-
-# standalone training example(1 pcs)
-sh run_standalone_train.sh dataset/ilsvrc
-
-# If you want to load pretrained ckpt file
-sh run_standalone_train.sh dataset/ilsvrc ./pretrained.ckpt
-```
-
-> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).
-
-#### Result
-
-Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". Under this, you can find checkpoint file together with result like the followings in log.
-
-```
-# distribute training result(8 pcs)
-epoch: 1 step: 5004, loss is 4.8995576
-epoch: 2 step: 5004, loss is 3.9235563
-epoch: 3 step: 5004, loss is 3.833077
-epoch: 4 step: 5004, loss is 3.2795618
-epoch: 5 step: 5004, loss is 3.1978393
-```
-
-### Infer
-
-#### Usage
-
-```
-# infer
-Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]
-```
-
-#### Launch
-
-```bash
-# infer with checkpoint
-sh run_infer.sh dataset/ilsvrc_eval train_parallel0/resnet-90_5004.ckpt
-```
-
-> checkpoint can be produced in training process.
-
-#### Result
-
-Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log.
-
-```
-result: {'acc': 0.7671054737516005} ckpt=train_parallel0/resnet-90_5004.ckpt
-```
-
-### Running on GPU
-```
-# distributed training example
-mpirun -n 8 python train.py --dataset_path=dataset/ilsvrc/train --device_target="GPU" --run_distribute=True
-
-# standalone training example
-python train.py --dataset_path=dataset/ilsvrc/train --device_target="GPU"
-
-# standalone training example with pretrained checkpoint
-python train.py --dataset_path=dataset/ilsvrc/train --device_target="GPU" --pre_trained=pretrained.ckpt
-
-# infer example
-python eval.py --dataset_path=dataset/ilsvrc/val --device_target="GPU" --checkpoint_path=resnet-90_5004ss.ckpt
-```
--- a/example/resnet50_imagenet2012/dataset.py
+++ b/example/resnet50_imagenet2012/dataset.py
@ -1,85 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-create train or eval dataset.
-"""
-import os
-import mindspore.common.dtype as mstype
-import mindspore.dataset.engine as de
-import mindspore.dataset.transforms.vision.c_transforms as C
-import mindspore.dataset.transforms.c_transforms as C2
-from mindspore.communication.management import init, get_rank, get_group_size
-
-def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
-    """
-    create a train or eval dataset
-
-    Args:
-        dataset_path(string): the path of dataset.
-        do_train(bool): whether dataset is used for train or eval.
-        repeat_num(int): the repeat times of dataset. Default: 1
-        batch_size(int): the batch size of dataset. Default: 32
-        target(str): the device target. Default: Ascend
-
-    Returns:
-        dataset
-    """
-    if target == "Ascend":
-        device_num = int(os.getenv("DEVICE_NUM"))
-        rank_id = int(os.getenv("RANK_ID"))
-    else:
-        init("nccl")
-        rank_id = get_rank()
-        device_num = get_group_size()
-
-    if device_num == 1:
-        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
-    else:
-        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
-                                     num_shards=device_num, shard_id=rank_id)
-
-    image_size = 224
-    mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
-    std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
-
-    # define map operations
-    if do_train:
-        trans = [
-            C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)),
-            C.RandomHorizontalFlip(prob=0.5),
-            C.Normalize(mean=mean, std=std),
-            C.HWC2CHW()
-        ]
-    else:
-        trans = [
-            C.Decode(),
-            C.Resize((256, 256)),
-            C.CenterCrop(image_size),
-            C.Normalize(mean=mean, std=std),
-            C.HWC2CHW()
-        ]
-
-    type_cast_op = C2.TypeCast(mstype.int32)
-
-    ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
-    ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
-
-    # apply batch operations
-    ds = ds.batch(batch_size, drop_remainder=True)
-
-    # apply dataset repeat operation
-    ds = ds.repeat(repeat_num)
-
-    return ds
--- a/example/resnet50_imagenet2012/eval.py
+++ b/example/resnet50_imagenet2012/eval.py
@ -1,62 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-eval.
-"""
-import os
-import argparse
-from dataset import create_dataset
-from config import config
-from mindspore import context
-from mindspore.model_zoo.resnet import resnet50
-from mindspore.train.model import Model
-from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from crossentropy import CrossEntropy
-
-parser = argparse.ArgumentParser(description='Image classification')
-parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
-parser.add_argument('--device_num', type=int, default=1, help='Device num.')
-parser.add_argument('--do_train', type=bool, default=False, help='Do train or not.')
-parser.add_argument('--do_eval', type=bool, default=True, help='Do eval or not.')
-parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
-parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
-parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
-args_opt = parser.parse_args()
-target = args_opt.device_target
-context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False)
-if target == "Ascend":
-    device_id = int(os.getenv('DEVICE_ID'))
-    context.set_context(device_id=device_id)
-
-if __name__ == '__main__':
-
-    net = resnet50(class_num=config.class_num)
-    if not config.use_label_smooth:
-        config.label_smooth_factor = 0.0
-    loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
-
-    if args_opt.do_eval:
-        dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size,
-                                 target=target)
-        step_size = dataset.get_dataset_size()
-
-        if args_opt.checkpoint_path:
-            param_dict = load_checkpoint(args_opt.checkpoint_path)
-            load_param_into_net(net, param_dict)
-        net.set_train(False)
-
-        model = Model(net, loss_fn=loss, metrics={'acc'})
-        res = model.eval(dataset)
-        print("result:", res, "ckpt=", args_opt.checkpoint_path)
--- a/example/resnet50_imagenet2012/train.py
+++ b/example/resnet50_imagenet2012/train.py
@ -1,122 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""train_imagenet."""
-import os
-import argparse
-import numpy as np
-from dataset import create_dataset
-from lr_generator import get_lr
-from config import config
-from mindspore import context
-from mindspore import Tensor
-from mindspore.model_zoo.resnet import resnet50
-from mindspore.parallel._auto_parallel_context import auto_parallel_context
-from mindspore.nn.optim.momentum import Momentum
-
-from mindspore.train.model import Model, ParallelMode
-
-from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
-from mindspore.train.loss_scale_manager import FixedLossScaleManager
-from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from mindspore.communication.management import init, get_rank, get_group_size
-import mindspore.nn as nn
-import mindspore.common.initializer as weight_init
-from crossentropy import CrossEntropy
-
-parser = argparse.ArgumentParser(description='Image classification')
-parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
-parser.add_argument('--device_num', type=int, default=1, help='Device num.')
-parser.add_argument('--do_train', type=bool, default=True, help='Do train or not.')
-parser.add_argument('--do_eval', type=bool, default=False, help='Do eval or not.')
-parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
-parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
-parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path')
-args_opt = parser.parse_args()
-
-if __name__ == '__main__':
-    target = args_opt.device_target
-    ckpt_save_dir = config.save_checkpoint_path
-    context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False)
-    np.random.seed(1)
-    if not args_opt.do_eval and args_opt.run_distribute:
-        if target == "Ascend":
-            device_id = int(os.getenv('DEVICE_ID'))
-            context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id,
-                                enable_auto_mixed_precision=True)
-            init()
-            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True)
-            auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160])
-            ckpt_save_dir = config.save_checkpoint_path
-        elif target == "GPU":
-            context.set_context(mode=context.GRAPH_MODE, device_target="GPU", save_graphs=False)
-            init("nccl")
-            context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True)
-            ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"
-
-    epoch_size = config.epoch_size
-    net = resnet50(class_num=config.class_num)
-
-    # weight init
-    if args_opt.pre_trained:
-        param_dict = load_checkpoint(args_opt.pre_trained)
-        load_param_into_net(net, param_dict)
-        epoch_size = config.epoch_size - config.pretrained_epoch_size
-    else:
-        for _, cell in net.cells_and_names():
-            if isinstance(cell, nn.Conv2d):
-                cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(),
-                                                                    cell.weight.default_input.shape,
-                                                                    cell.weight.default_input.dtype).to_tensor()
-            if isinstance(cell, nn.Dense):
-                cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(),
-                                                                    cell.weight.default_input.shape,
-                                                                    cell.weight.default_input.dtype).to_tensor()
-    if not config.use_label_smooth:
-        config.label_smooth_factor = 0.0
-
-    loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
-
-    if args_opt.do_train:
-        dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
-                                 repeat_num=epoch_size, batch_size=config.batch_size, target=target)
-        step_size = dataset.get_dataset_size()
-
-        loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
-        lr = get_lr(lr_init=config.lr_init, lr_end=0.0, lr_max=config.lr_max, warmup_epochs=config.warmup_epochs,
-                    total_epochs=config.epoch_size, steps_per_epoch=step_size, lr_decay_mode='cosine')
-        if args_opt.pre_trained:
-            lr = lr[config.pretrained_epoch_size * step_size:]
-        lr = Tensor(lr)
-
-        opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
-                       config.weight_decay, config.loss_scale)
-        if target == "Ascend":
-            model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'},
-                          amp_level="O2", keep_batchnorm_fp32=False)
-        elif target == "GPU":
-            model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})
-
-
-        time_cb = TimeMonitor(data_size=step_size)
-        loss_cb = LossMonitor()
-        cb = [time_cb, loss_cb]
-        if config.save_checkpoint:
-            config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs*step_size,
-                                         keep_checkpoint_max=config.keep_checkpoint_max)
-            ckpt_cb = ModelCheckpoint(prefix="resnet", directory=ckpt_save_dir, config=config_ck)
-            cb += [ckpt_cb]
-        model.train(epoch_size, dataset, callbacks=cb)
--- a/mindspore/_checkparam.py
+++ b/mindspore/_checkparam.py
@ -593,6 +593,17 @@ def check_bool(input_param):
    raise TypeError("Input type must be bool!")


+def check_string(input_param, valid_values):
+    """String type judgment."""
+    if isinstance(input_param, str) and input_param in valid_values:
+        return input_param
+    if len(valid_values) == 1:
+        raise ValueError(f'Input should be str and must be {valid_values[0]},'
+                         f' but got {input_param}.')
+    raise ValueError(f'Input should be str and must be one of {valid_values},'
+                     f' but got {input_param}.')
+
+
 def check_input_format(input_param):
    """Judge input format."""
    if input_param == "NCHW":
--- a/mindspore/_extends/parse/parser.py
+++ b/mindspore/_extends/parse/parser.py
@ -19,6 +19,7 @@
 import ast
 import types
 import inspect
+import hashlib
 from textwrap import dedent
 from dataclasses import is_dataclass
 import asttokens
@ -319,7 +320,6 @@ def get_dataclass_methods(cls):
               if isinstance(getattr(cls, name), (types.FunctionType,))}
    return methods

-
 class Parser:
    """
    Parser python code to ast tree.
@ -327,7 +327,10 @@ class Parser:
    Args:
        fn(FunctionType/MethodType): Need parse object instance.
        parse_method(ExtendInfoOfParseObj): Extend information for parse the function.
+        ast_cache: Dictionary for caching ast tree.
    """
+    ast_cache = {}
+
    def __init__(self, fn: (types.FunctionType, types.MethodType), parse_method=None) -> None:
        self.fn = fn
        self.parse_method = parse_method
@ -348,11 +351,15 @@ class Parser:
        tree = None
        if isinstance(self.fn, (types.FunctionType, types.MethodType)):
            original_src = inspect.getsource(self.fn)
-            src = dedent(original_src)
-            self.col_offset = \
-                len(original_src.split('\n')[0]) - len(src.split('\n')[0])
-            logger.debug("get source = %s", src)
-            tree = asttokens.ASTTokens(src, parse=True).tree
+            hexstr = hashlib.sha256(original_src.encode()).hexdigest()
+            tree = Parser.ast_cache.get(hexstr)
+            if not tree:
+                src = dedent(original_src)
+                self.col_offset = \
+                    len(original_src.split('\n')[0]) - len(src.split('\n')[0])
+                logger.debug("get source = %s", src)
+                tree = asttokens.ASTTokens(src, parse=True).tree
+                Parser.ast_cache[hexstr] = tree
        else:
            logger.error("Fn type is invalid")
        return tree
--- a/mindspore/_extends/parse/resources.py
+++ b/mindspore/_extends/parse/resources.py
@ -17,6 +17,7 @@
 """Resources for ast tree parse."""
 import ast
 import math
+from mindspore import IndexedSlices
 from mindspore.ops.composite import multitype_ops
 from mindspore.ops import functional as F, composite as C
 from . import standard_method as M
@ -111,10 +112,11 @@ convert_object_map = {
    # system function
    T.len:          M.ms_len,
    T.bool:         M.bool_,
-    T.map:          C.HyperMap(),
+    T.map:          C.Map(),
    T.partial:      F.partial,
    T.zip:          C.zip_operation,
    T.print:        F.print_,
+    T.enumerate:    M.enumerate_,

    # custom define operation
    T.iter:         M.ms_iter,
@ -135,4 +137,7 @@ convert_object_map = {
    math.sin:       NO_IMPLEMENT,
    math.cos:       NO_IMPLEMENT,
    math.tan:       NO_IMPLEMENT,
+
+    # user defined
+    IndexedSlices:  F.make_indexed_slices,
 }
--- a/mindspore/_extends/parse/standard_method.py
+++ b/mindspore/_extends/parse/standard_method.py
@ -104,6 +104,15 @@ def bool_(x):
    return x.__bool__()


+def enumerate_(x, start=0):
+    """Enumerate list or tuple."""
+    x_type = F.typeof(x)
+    ret = ()
+    if check_is_tuple_or_list(x_type, "enumerate"):
+        ret = zip(range(start, start + len(x)), x)
+    return ret
+
+
 def while_cond(x):
    """For while condtion, if the condition is a tensor, the loop will not be unrolled"""
    if F.issubclass_(F.typeof(x), F.typeof(mstype.tensor)):
@ -113,6 +122,13 @@ def while_cond(x):
    return x


+@constexpr
+def check_is_tuple_or_list(x, op_name):
+    """check whether x is list or tuple."""
+    if isinstance(x, (mstype.list_type, mstype.tuple_type)):
+        return True
+    raise TypeError(f"For '{op_name}', the input parameter should be tuple or list, but got {x}.")
+
@constexpr
 def check_is_tensor_bool_cond(shp):
    """check if tensor is a bool condition"""
--- a/mindspore/_extends/parse/trope.py
+++ b/mindspore/_extends/parse/trope.py
@ -27,7 +27,7 @@ from operator import (  # noqa

 # support system function call
 from builtins import (  # noqa
-    bool, getattr, setattr, len, iter, next, pow, range, map, zip, print
+    bool, getattr, setattr, len, iter, next, pow, range, map, zip, print, enumerate
 )

 # support functools
@ -44,7 +44,7 @@ __all__ = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'mod', 'eq', 'ne', 'lt',
           'not_', 'and_', 'or_', 'xor', 'lshift', 'rshift', 'invert', 'is_', 'is_not', 'contains',
           'matmul', 'getitem', 'setitem',
           'bool', 'getattr', 'setattr', 'len', 'iter', 'next', 'pow', 'range', 'map', 'zip',
-           'partial', 'print',
+           'partial', 'print', 'enumerate',
           'exp', 'log', 'sin', 'cos', 'tan']


--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@ -71,6 +71,17 @@ message("onnx proto path is :" ${ONNX_PROTO})
 ms_protobuf_generate(ONNX_PROTO_SRCS ONNX_PROTO_HDRS ${ONNX_PROTO})
 list(APPEND MINDSPORE_PROTO_LIST ${ONNX_PROTO_SRCS})

+if (ENABLE_DEBUGGER)
+    # debugger: compile proto files
+    include_directories("${CMAKE_BINARY_DIR}/debug/debugger")
+    file(GLOB_RECURSE DEBUGGER_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "debug/debugger/debug_graph.proto")
+    ms_protobuf_generate(DEBUGGER_PROTO_SRCS DEBUGGER_PROTO_HDRS ${DEBUGGER_PROTO_LIST})
+    file(GLOB_RECURSE DEBUGGER_GRPC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "debug/debugger/debug_grpc.proto")
+    ms_grpc_generate(DEBUGGER_GRPC_SRCS DEBUGGER_GRPC_HDRS ${DEBUGGER_GRPC_LIST})
+    list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_PROTO_SRCS})
+    list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_GRPC_SRCS})
+endif ()
+
 if (ENABLE_DUMP_PROTO)
    include_directories(${CMAKE_BINARY_DIR})
    file(GLOB_RECURSE PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "utils/node_strategy.proto")
@ -125,12 +136,21 @@ endforeach ()

 set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
 add_library(mindspore STATIC ${SUB_OBJECTS_SRC})
+
+target_link_libraries(proto_input mindspore::protobuf)
+
+if (ENABLE_DEBUGGER)
+    # debugger: link grpc
+    target_link_libraries(proto_input mindspore::grpc++)
+endif()
+
 target_link_libraries(mindspore proto_input)
-if (ENABLE_CPU AND ENABLE_MPI)
-    target_link_libraries(mindspore securec mindspore::flatbuffers mindspore::ompi)
+if (ENABLE_MPI)
+    target_link_libraries(mindspore securec mindspore::flatbuffers mpi_adapter)
 else ()
    target_link_libraries(mindspore securec mindspore::flatbuffers)
 endif ()
+
 if (NOT WIN32)
  target_link_libraries(mindspore dl)
 endif()
@ -210,6 +230,10 @@ else ()
    target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive)
    target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module)
    target_link_libraries(_c_expression PRIVATE mindspore_gvar)
+    target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
+    if (${ENABLE_IBVERBS} STREQUAL "ON")
+        target_link_libraries(_c_expression PRIVATE ibverbs rdmacm)
+    endif()
 endif ()

 if (USE_GLOG)
@ -217,6 +241,7 @@ if (USE_GLOG)
 endif ()

 if (ENABLE_DUMP_PROTO)
+    message("add protobuf lib to c_expression")
    target_link_libraries(_c_expression PRIVATE mindspore::protobuf)
 endif ()

@ -256,10 +281,11 @@ endif ()

 if (USE_GLOG)
    target_link_libraries(inference PRIVATE mindspore::glog)
-else()
-    if (CMAKE_SYSTEM_NAME MATCHES "Linux")
-        target_link_options(inference PRIVATE -Wl,-init,mindspore_log_init)
-    elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
-        set_target_properties(inference PROPERTIES MACOSX_RPATH ON)
-    endif ()
 endif()
+
+if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+    target_link_options(inference PRIVATE -Wl,-init,common_log_init)
+elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
+    set_target_properties(inference PROPERTIES MACOSX_RPATH ON)
+endif ()
+
--- a/mindspore/ccsrc/dataset/api/de_pipeline.cc
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.cc
@ -15,6 +15,7 @@
 */
 #include "dataset/api/de_pipeline.h"

+#include <algorithm>
 #include <set>
 #include <map>

@ -45,7 +46,7 @@

 namespace mindspore {
 namespace dataset {
-using pFunction = Status (DEPipeline::*)(const py::dict &, std::shared_ptr<DatasetOp> *);
+using pFunction = Status (DEPipeline::*)(const py::dict &, std::shared_ptr<DatasetOp> *, std::shared_ptr<DatasetOp> *);

 static std::unordered_map<uint32_t, pFunction> g_parse_op_func_ = {
  {kShuffle, &DEPipeline::ParseShuffleOp},
@ -107,18 +108,44 @@ DEPipeline::~DEPipeline() {
 }

 // Function to add a Node to the Execution Tree.
-Status DEPipeline::AddNodeToTree(const OpName &op_name, const py::dict &args, DsOpPtr *out) {
-  // For each operator, Parse through the list of arguments,
-  // then call the respective builder/constructor.
+Status DEPipeline::AddNodeToTree(const OpName &op_name, const py::dict &args, py::dict *output) {
+  // For each operator, Parse through the list of arguments, then call the respective builder/constructor.
+  // Note that each call to the parse function may result in building more than one dataset operator.
+  // For example, one call to ParseNNNOp may result in multiple internal C nodes:
+  // nodeA
+  //   |
+  // nodeB
+  //   |
+  // nodeC
+  // However, the python side dataset is more abstract, and it does not know about the potential subtree that
+  // is being built here. Since the python api is hooking tree nodes together (parent/child hookups), the
+  // python side needs to know about nodeA and NodeC to be able to appropriately hook up parents and child
+  // to this subtee.
+  // Thus, it is required that both the top-most parent and bottom-most child are returned from the parse
+  // function.
+  DsOpPtr top = nullptr;
+  DsOpPtr bottom = nullptr;
  auto iter = g_parse_op_func_.find(op_name);
  if (iter != g_parse_op_func_.end()) {
    pFunction func = iter->second;
-    RETURN_IF_NOT_OK((this->*func)(args, out));
+    RETURN_IF_NOT_OK((this->*func)(args, &top, &bottom));
+
+    if (top == nullptr) {
+      RETURN_STATUS_UNEXPECTED("An operator was parsed but it did not produce a C node.");
+    }
+
+    // It is not required that the parse function always produces the bottom pointer. If it's still null,
+    // then set top and bottom to be the same operator
+    if (bottom == nullptr) bottom = top;
+
+    // Pack these pointers into a py dict so that we can return both back to python.
+    (*output)["top"] = top;
+    (*output)["bottom"] = bottom;
  } else {
    RETURN_STATUS_UNEXPECTED("No such Op");
  }
  // Associate current dataset op node with the tree.
-  RETURN_IF_NOT_OK(tree_->AssociateNode(*out));
+  RETURN_IF_NOT_OK(tree_->AssociateNode(top));
  return Status::OK();
 }
 // Function to add a child and parent relationship.
@ -300,7 +327,8 @@ Status DEPipeline::SetBatchParameters(const py::dict &args) {
  return Status::OK();
 }

-Status DEPipeline::ParseShuffleOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseShuffleOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                  std::shared_ptr<DatasetOp> *bottom) {
  std::shared_ptr<ShuffleOp::Builder> builder = std::make_shared<ShuffleOp::Builder>();
  if (!args["buffer_size"].is_none()) {
    (void)builder->SetShuffleSize(ToInt(args["buffer_size"]));
@ -322,7 +350,7 @@ Status DEPipeline::ParseShuffleOp(const py::dict &args, std::shared_ptr<DatasetO

  std::shared_ptr<ShuffleOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

@ -350,7 +378,8 @@ Status DEPipeline::BuildMindrecordSamplerChain(const py::handle &handle,
  return Status::OK();
 }

-Status DEPipeline::ParseMindRecordOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseMindRecordOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                     std::shared_ptr<DatasetOp> *bottom) {
  if (args["dataset_file"].is_none()) {
    std::string err_msg = "Error: at least one of dataset_files is missing";
    RETURN_STATUS_UNEXPECTED(err_msg);
@ -403,13 +432,15 @@ Status DEPipeline::ParseMindRecordOp(const py::dict &args, std::shared_ptr<Datas
  std::shared_ptr<MindRecordOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
  num_rows_ = op->num_rows();
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
-  std::shared_ptr<MapOp::Builder> builder = std::make_shared<MapOp::Builder>();
+Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                              std::shared_ptr<DatasetOp> *bottom) {
+  MapOp::Builder map_builder;
  std::vector<std::shared_ptr<TensorOp>> tensor_op_list;
+  std::vector<std::string> project_columns;

  if (args["operations"].is_none()) RETURN_STATUS_UNEXPECTED("Error: 'operations' is not set. \n");

@ -419,15 +450,15 @@ Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *
    if (!value.is_none()) {
      if (key == "input_columns") {
        std::vector<std::string> in_col_names = ToStringVector(args["input_columns"]);
-        (void)builder->SetInColNames(in_col_names);
+        (void)map_builder.SetInColNames(in_col_names);
      } else if (key == "output_columns") {
-        (void)builder->SetOutColNames(ToStringVector(value));
+        (void)map_builder.SetOutColNames(ToStringVector(value));
      } else if (key == "columns_order") {
-        (void)builder->SetColOrder(ToStringVector(value));
+        project_columns = ToStringVector(value);
      } else if (key == "num_parallel_workers") {
-        (void)builder->SetNumWorkers(ToInt(value));
+        (void)map_builder.SetNumWorkers(ToInt(value));
      } else if (key == "prefetch_size") {
-        (void)builder->SetOpConnectorSize(ToInt(value));
+        (void)map_builder.SetOpConnectorSize(ToInt(value));
      } else if (key == "operations") {
        py::handle tensor_ops = args["operations"];
        // operation can be a list of TensorOps or a single TensorOp.
@ -445,20 +476,34 @@ Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *
          }
        }
        if (tensor_op_list.empty()) RETURN_STATUS_UNEXPECTED("Error: tensor_op is invalid or not set.");
-        (void)builder->SetTensorFuncs(std::move(tensor_op_list));
+        (void)map_builder.SetTensorFuncs(std::move(tensor_op_list));
      } else {
        RETURN_STATUS_UNEXPECTED("Error: Unhandled key: " + key);
      }
    }
  }

-  std::shared_ptr<MapOp> op;
-  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  std::shared_ptr<MapOp> map_op;
+  RETURN_IF_NOT_OK(map_builder.Build(&map_op));
+  RETURN_IF_NOT_OK(tree_->AssociateNode(map_op));
+  *top = map_op;
+
+  // Add a project op over top of the map if the user wanted to reposition the columns
+  if (!project_columns.empty()) {
+    ProjectOp::Builder proj_builder(project_columns);
+    std::shared_ptr<ProjectOp> proj_op;
+    RETURN_IF_NOT_OK(proj_builder.Build(&proj_op));
+    RETURN_IF_NOT_OK(tree_->AssociateNode(proj_op));
+    RETURN_IF_NOT_OK(proj_op->AddChild(map_op));
+    *top = proj_op;
+    *bottom = map_op;
+  }
+
  return Status::OK();
 }

-Status DEPipeline::ParseFilterOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseFilterOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                 std::shared_ptr<DatasetOp> *bottom) {
  std::shared_ptr<FilterOp::Builder> builder = std::make_shared<FilterOp::Builder>();

  if (args["predicate"].is_none()) {
@ -489,11 +534,12 @@ Status DEPipeline::ParseFilterOp(const py::dict &args, std::shared_ptr<DatasetOp

  std::shared_ptr<FilterOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseRepeatOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseRepeatOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                 std::shared_ptr<DatasetOp> *bottom) {
  if (args["count"].is_none()) {
    std::string err_msg = "Error: count is invalid or not set.";
    RETURN_STATUS_UNEXPECTED(err_msg);
@ -501,22 +547,24 @@ Status DEPipeline::ParseRepeatOp(const py::dict &args, std::shared_ptr<DatasetOp
  repeat_num_ = ToInt(args["count"]);
  std::shared_ptr<RepeatOp> op;
  RETURN_IF_NOT_OK(RepeatOp::Builder(ToInt(args["count"])).Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseSkipOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseSkipOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                               std::shared_ptr<DatasetOp> *bottom) {
  if (args["count"].is_none()) {
    std::string err_msg = "Error: count is invalid or not set.";
    RETURN_STATUS_UNEXPECTED(err_msg);
  }
  std::shared_ptr<SkipOp> op;
  RETURN_IF_NOT_OK(SkipOp::Builder(ToInt(args["count"])).Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseGeneratorOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseGeneratorOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                    std::shared_ptr<DatasetOp> *bottom) {
  std::shared_ptr<GeneratorOp::Builder> builder = std::make_shared<GeneratorOp::Builder>();
  for (auto arg : args) {
    std::string key = py::str(arg.first);
@ -538,11 +586,12 @@ Status DEPipeline::ParseGeneratorOp(const py::dict &args, std::shared_ptr<Datase
  }
  std::shared_ptr<GeneratorOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseBatchOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseBatchOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                std::shared_ptr<DatasetOp> *bottom) {
  std::shared_ptr<BatchOp::Builder> builder;
  if (py::isinstance<py::int_>(args["batch_size"])) {
    batch_size_ = ToInt(args["batch_size"]);
@ -582,11 +631,12 @@ Status DEPipeline::ParseBatchOp(const py::dict &args, std::shared_ptr<DatasetOp>

  std::shared_ptr<BatchOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseBucketBatchByLengthOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseBucketBatchByLengthOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                              std::shared_ptr<DatasetOp> *bottom) {
  std::vector<std::string> mandatory_arguments = {"length_dependent_columns", "bucket_boundaries",
                                                  "bucket_batch_sizes"};
  for (auto name : mandatory_arguments) {
@ -632,11 +682,12 @@ Status DEPipeline::ParseBucketBatchByLengthOp(const py::dict &args, std::shared_

  std::shared_ptr<BucketBatchByLengthOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseBarrierOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseBarrierOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                  std::shared_ptr<DatasetOp> *bottom) {
  std::shared_ptr<BarrierOp::Builder> builder = std::make_shared<BarrierOp::Builder>();
  // Right now barrier should only take num_rows_per_buffer = 1
  // The reason for this is because having it otherwise can lead to blocking issues
@ -656,11 +707,12 @@ Status DEPipeline::ParseBarrierOp(const py::dict &args, std::shared_ptr<DatasetO

  std::shared_ptr<BarrierOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseDeviceQueueOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseDeviceQueueOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                      std::shared_ptr<DatasetOp> *bottom) {
  int32_t prefetch_size = 0;
  if (args.contains("prefetch_size")) {
    if (args["prefetch_size"].is_none()) {
@ -687,11 +739,12 @@ Status DEPipeline::ParseDeviceQueueOp(const py::dict &args, std::shared_ptr<Data
  }
  std::shared_ptr<DeviceQueueOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseRenameOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseRenameOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                 std::shared_ptr<DatasetOp> *bottom) {
  std::vector<std::string> in_col_names;
  std::vector<std::string> out_col_names;
  std::shared_ptr<RenameOp::Builder> builder = std::make_shared<RenameOp::Builder>();
@ -718,48 +771,57 @@ Status DEPipeline::ParseRenameOp(const py::dict &args, std::shared_ptr<DatasetOp
  (void)builder->SetOutColNames(out_col_names);
  std::shared_ptr<RenameOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseTakeOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseTakeOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                               std::shared_ptr<DatasetOp> *bottom) {
  if (args["count"].is_none()) {
    std::string err_msg = "Error: count is invalid or not set.";
    RETURN_STATUS_UNEXPECTED(err_msg);
  }
  std::shared_ptr<TakeOp> op;
  RETURN_IF_NOT_OK(TakeOp::Builder(ToInt(args["count"])).Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseZipOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseZipOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                              std::shared_ptr<DatasetOp> *bottom) {
  std::shared_ptr<ZipOp::Builder> builder = std::make_shared<ZipOp::Builder>();
  std::shared_ptr<ZipOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseConcatOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseConcatOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                 std::shared_ptr<DatasetOp> *bottom) {
  std::shared_ptr<ConcatOp::Builder> builder = std::make_shared<ConcatOp::Builder>();
  std::shared_ptr<ConcatOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                   std::shared_ptr<DatasetOp> *bottom) {
  // Required arguments
+  std::vector<std::string> files_list;
  std::shared_ptr<TFReaderOp::Builder> builder = std::make_shared<TFReaderOp::Builder>();
  if (!args["dataset_files"].is_none()) {
-    (void)builder->SetDatasetFilesList(ToStringVector(args["dataset_files"]));
+    files_list = ToStringVector(args["dataset_files"]);
+    (void)builder->SetDatasetFilesList(files_list);
  } else {
    std::string err_msg = "Error: at least one of dataset_files or schema_file is missing";
    RETURN_STATUS_UNEXPECTED(err_msg);
  }
  std::vector<std::string> columns_to_load;
  bool schema_exists = false;
+  bool shuffle_required = false;
+  int64_t num_devices = 0;
+  int64_t total_rows = 0;
  // Optional arguments
  for (auto arg : args) {
    std::string key = py::str(arg.first);
@ -773,13 +835,15 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<Dataset
      } else if (key == "shuffle_files") {
        (void)builder->SetShuffleFiles(ToBool(value));
      } else if (key == "shuffle_global") {
-        (void)builder->SetShuffleGlobal(ToBool(value));
+        shuffle_required = ToBool(value);
      } else if (key == "schema_file_path" || key == "schema_json_string") {
        schema_exists = true;
      } else if (key == "num_samples") {
-        (void)builder->setTotalRows(ToInt(value));
+        total_rows = ToInt(value);
+        (void)builder->setTotalRows(total_rows);
      } else if (key == "num_shards") {
-        (void)builder->SetNumDevices(ToInt(value));
+        num_devices = ToInt(value);
+        (void)builder->SetNumDevices(num_devices);
      } else if (key == "shard_id") {
        (void)builder->SetDeviceId(ToInt(value));
      } else if (key == "shard_equal_rows") {
@ -796,13 +860,33 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<Dataset
    }
    (void)builder->SetDataSchema(std::move(schema));
  }
-  std::shared_ptr<TFReaderOp> op;
-  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  std::shared_ptr<TFReaderOp> tf_op;
+  RETURN_IF_NOT_OK(builder->Build(&tf_op));
+  RETURN_IF_NOT_OK(tree_->AssociateNode(tf_op));
+  *top = tf_op;
+
+  if (shuffle_required) {
+    const boolean estimate = true;
+    const int64_t workers = 8;
+    std::shared_ptr<DatasetOp> shuffle_op = nullptr;
+    int64_t shuffle_size = 0;
+    int64_t num_rows = 0;
+
+    // First, get the number of rows in the dataset via estimate and then compute the shuffle size
+    RETURN_IF_NOT_OK(TFReaderOp::CountTotalRows(&num_rows, files_list, workers, estimate));
+    RETURN_IF_NOT_OK(ComputeShuffleSize(files_list.size(), num_devices, num_rows, total_rows, &shuffle_size));
+
+    // Add the shuffle op over top of this op and return the subtree (top/bottom) to caller
+    RETURN_IF_NOT_OK(AddShuffleOp(shuffle_size, tf_op, &shuffle_op));
+    *top = shuffle_op;
+    *bottom = tf_op;
+  }
+
  return Status::OK();
 }

-Status DEPipeline::ParseProjectOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseProjectOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                  std::shared_ptr<DatasetOp> *bottom) {
  if (args["columns"].is_none()) {
    std::string err_msg = "Error: columns is missing";
    RETURN_STATUS_UNEXPECTED(err_msg);
@ -811,11 +895,12 @@ Status DEPipeline::ParseProjectOp(const py::dict &args, std::shared_ptr<DatasetO
  std::shared_ptr<ProjectOp::Builder> builder = std::make_shared<ProjectOp::Builder>(columns_to_project);
  std::shared_ptr<ProjectOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseImageFolderOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseImageFolderOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                      std::shared_ptr<DatasetOp> *bottom) {
  // Required arguments
  if (args["dataset_dir"].is_none()) {
    std::string err_msg = "Error: No dataset path specified";
@ -846,11 +931,12 @@ Status DEPipeline::ParseImageFolderOp(const py::dict &args, std::shared_ptr<Data
  }
  std::shared_ptr<ImageFolderOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseManifestOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseManifestOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                   std::shared_ptr<DatasetOp> *bottom) {
  // Required arguments
  if (args["dataset_file"].is_none()) {
    std::string err_msg = "Error: No dataset files specified for manifest";
@ -881,11 +967,12 @@ Status DEPipeline::ParseManifestOp(const py::dict &args, std::shared_ptr<Dataset
  }
  std::shared_ptr<ManifestOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseVOCOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseVOCOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                              std::shared_ptr<DatasetOp> *bottom) {
  if (args["dataset_dir"].is_none()) {
    std::string err_msg = "Error: No dataset path specified";
    RETURN_STATUS_UNEXPECTED(err_msg);
@ -924,11 +1011,13 @@ Status DEPipeline::ParseVOCOp(const py::dict &args, std::shared_ptr<DatasetOp> *
  }
  std::shared_ptr<VOCOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
+
  return Status::OK();
 }

-Status DEPipeline::ParseCocoOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseCocoOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                               std::shared_ptr<DatasetOp> *bottom) {
  if (args["dataset_dir"].is_none()) {
    std::string err_msg = "Error: No dataset path specified";
    RETURN_STATUS_UNEXPECTED(err_msg);
@ -965,11 +1054,12 @@ Status DEPipeline::ParseCocoOp(const py::dict &args, std::shared_ptr<DatasetOp>
  }
  std::shared_ptr<CocoOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseCifar10Op(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseCifar10Op(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                  std::shared_ptr<DatasetOp> *bottom) {
  // Required arguments
  if (args["dataset_dir"].is_none()) {
    std::string err_msg = "Error: No dataset path specified";
@ -998,11 +1088,12 @@ Status DEPipeline::ParseCifar10Op(const py::dict &args, std::shared_ptr<DatasetO

  std::shared_ptr<CifarOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseCifar100Op(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseCifar100Op(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                   std::shared_ptr<DatasetOp> *bottom) {
  // Required arguments
  if (args["dataset_dir"].is_none()) {
    std::string err_msg = "Error: No dataset path specified";
@ -1031,11 +1122,12 @@ Status DEPipeline::ParseCifar100Op(const py::dict &args, std::shared_ptr<Dataset

  std::shared_ptr<CifarOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseRandomDataOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseRandomDataOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                     std::shared_ptr<DatasetOp> *bottom) {
  // Required arguments
  RandomDataOp::Builder builder;

@ -1072,13 +1164,14 @@ Status DEPipeline::ParseRandomDataOp(const py::dict &args, std::shared_ptr<Datas
  }
  std::shared_ptr<RandomDataOp> op;
  RETURN_IF_NOT_OK(builder.Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

 int32_t DEPipeline::GetNumClasses() const { return num_classes_; }

-Status DEPipeline::ParseMnistOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseMnistOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                std::shared_ptr<DatasetOp> *bottom) {
  // Required arguments
  if (args["dataset_dir"].is_none()) {
    std::string err_msg = "Error: No dataset path specified";
@ -1104,11 +1197,12 @@ Status DEPipeline::ParseMnistOp(const py::dict &args, std::shared_ptr<DatasetOp>
  }
  std::shared_ptr<MnistOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseCelebAOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseCelebAOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                 std::shared_ptr<DatasetOp> *bottom) {
  // Required arguments
  if (args["dataset_dir"].is_none()) {
    std::string err_msg = "Error: No dataset path specified";
@ -1143,19 +1237,24 @@ Status DEPipeline::ParseCelebAOp(const py::dict &args, std::shared_ptr<DatasetOp

  std::shared_ptr<CelebAOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseTextFileOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseTextFileOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                   std::shared_ptr<DatasetOp> *bottom) {
  // Required arguments
+  std::vector<std::string> files_list;
  std::shared_ptr<TextFileOp::Builder> builder = std::make_shared<TextFileOp::Builder>();
  if (!args["dataset_files"].is_none()) {
-    (void)builder->SetTextFilesList(ToStringVector(args["dataset_files"]));
+    files_list = ToStringVector(args["dataset_files"]);
+    (void)builder->SetTextFilesList(files_list);
  } else {
    RETURN_STATUS_UNEXPECTED("Error: dataset_files is missing");
  }
  // Optional arguments
+  bool shuffle_required = false;
+  int64_t num_devices = 0;
  for (auto arg : args) {
    std::string key = py::str(arg.first);
    py::handle value = arg.second;
@ -1165,19 +1264,38 @@ Status DEPipeline::ParseTextFileOp(const py::dict &args, std::shared_ptr<Dataset
      } else if (key == "shuffle_files") {
        (void)builder->SetShuffleFiles(ToBool(value));
      } else if (key == "shuffle_global") {
-        (void)builder->SetShuffleGlobal(ToBool(value));
+        shuffle_required = ToBool(value);
      } else if (key == "num_samples") {
        (void)builder->SetTotalRows(ToInt(value));
      } else if (key == "num_shards") {
-        (void)builder->SetNumDevices(ToInt(value));
+        num_devices = ToInt(value);
+        (void)builder->SetNumDevices(num_devices);
      } else if (key == "shard_id") {
        (void)builder->SetDeviceId(ToInt(value));
      }
    }
  }
-  std::shared_ptr<TextFileOp> op;
-  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+
+  std::shared_ptr<TextFileOp> txt_op;
+  RETURN_IF_NOT_OK(builder->Build(&txt_op));
+  RETURN_IF_NOT_OK(tree_->AssociateNode(txt_op));
+  *top = txt_op;
+
+  if (shuffle_required) {
+    std::shared_ptr<DatasetOp> shuffle_op = nullptr;
+    int64_t shuffle_size = 0;
+    int64_t num_rows = 0;
+
+    // First, get the number of rows in the dataset and then compute the shuffle size
+    RETURN_IF_NOT_OK(TextFileOp::CountAllFileRows(files_list, &num_rows));
+    RETURN_IF_NOT_OK(ComputeShuffleSize(files_list.size(), num_devices, num_rows, 0, &shuffle_size));
+
+    // Add the shuffle op over top of this op and return the subtree (top/bottom) to caller
+    RETURN_IF_NOT_OK(AddShuffleOp(shuffle_size, txt_op, &shuffle_op));
+    *top = shuffle_op;
+    *bottom = txt_op;
+  }
+
  return Status::OK();
 }

@ -1208,7 +1326,8 @@ Status DEPipeline::ParsePadInfo(py::handle value, PadInfo *pad_info) {
  return Status::OK();
 }

-Status DEPipeline::ParseBuildVocabOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseBuildVocabOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                     std::shared_ptr<DatasetOp> *bottom) {
  std::shared_ptr<BuildVocabOp::Builder> builder = std::make_shared<BuildVocabOp::Builder>();
  for (auto arg : args) {
    std::string key = py::str(arg.first);
@ -1235,18 +1354,23 @@ Status DEPipeline::ParseBuildVocabOp(const py::dict &args, std::shared_ptr<Datas
  }
  std::shared_ptr<BuildVocabOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+  *top = op;
  return Status::OK();
 }

-Status DEPipeline::ParseClueOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+Status DEPipeline::ParseClueOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                               std::shared_ptr<DatasetOp> *bottom) {
+  std::vector<std::string> files_list;
  std::shared_ptr<ClueOp::Builder> builder = std::make_shared<ClueOp::Builder>();
  if (!args["dataset_files"].is_none()) {
-    (void)builder->SetClueFilesList(ToStringVector(args["dataset_files"]));
+    files_list = ToStringVector(args["dataset_files"]);
+    (void)builder->SetClueFilesList(files_list);
  } else {
    RETURN_STATUS_UNEXPECTED("Error: dataset_files is missing");
  }
  // Optional arguments
+  bool shuffle_required = false;
+  int64_t num_devices = 0;
  for (auto arg : args) {
    std::string key = py::str(arg.first);
    py::handle value = arg.second;
@ -1256,11 +1380,12 @@ Status DEPipeline::ParseClueOp(const py::dict &args, std::shared_ptr<DatasetOp>
      } else if (key == "shuffle_files") {
        (void)builder->SetShuffleFiles(ToBool(value));
      } else if (key == "shuffle_global") {
-        (void)builder->SetShuffleGlobal(ToBool(value));
+        shuffle_required = ToBool(value);
      } else if (key == "num_samples") {
        (void)builder->SetNumSamples(ToInt(value));
      } else if (key == "num_shards") {
-        (void)builder->SetNumDevices(ToInt(value));
+        num_devices = ToInt(value);
+        (void)builder->SetNumDevices(num_devices);
      } else if (key == "shard_id") {
        (void)builder->SetDeviceId(ToInt(value));
      } else if (key == "cols_to_keyword") {
@ -1276,9 +1401,76 @@ Status DEPipeline::ParseClueOp(const py::dict &args, std::shared_ptr<DatasetOp>
      }
    }
  }
-  std::shared_ptr<ClueOp> op;
-  RETURN_IF_NOT_OK(builder->Build(&op));
-  *ptr = op;
+
+  std::shared_ptr<ClueOp> clue_op;
+  RETURN_IF_NOT_OK(builder->Build(&clue_op));
+  RETURN_IF_NOT_OK(tree_->AssociateNode(clue_op));
+  *top = clue_op;
+
+  if (shuffle_required) {
+    std::shared_ptr<DatasetOp> shuffle_op = nullptr;
+    int64_t shuffle_size = 0;
+    int64_t num_rows = 0;
+
+    // First, get the number of rows in the dataset and then compute the shuffle size
+    RETURN_IF_NOT_OK(ClueOp::CountAllFileRows(files_list, &num_rows));
+    RETURN_IF_NOT_OK(ComputeShuffleSize(files_list.size(), num_devices, num_rows, 0, &shuffle_size));
+
+    // Add the shuffle op over top of this op and return the subtree (top/bottom) to caller
+    RETURN_IF_NOT_OK(AddShuffleOp(shuffle_size, clue_op, &shuffle_op));
+    *top = shuffle_op;
+    *bottom = clue_op;
+  }
+
+  return Status::OK();
+}
+
+// Helper function to inject a shuffle operator over top of the current operation being built.
+Status DEPipeline::AddShuffleOp(int64_t shuffle_size, std::shared_ptr<DatasetOp> input_op,
+                                std::shared_ptr<DatasetOp> *shuffle_op) {
+  std::shared_ptr<ShuffleOp> new_shuffle_op = nullptr;
+  ShuffleOp::Builder shuffle_builder;
+
+  (void)shuffle_builder.SetShuffleSize(shuffle_size);
+  RETURN_IF_NOT_OK(shuffle_builder.Build(&new_shuffle_op));
+  RETURN_IF_NOT_OK(tree_->AssociateNode(new_shuffle_op));
+  RETURN_IF_NOT_OK(new_shuffle_op->AddChild(input_op));
+  // We have now created:
+  //
+  // ShuffleOp
+  //    |
+  // input_op
+  //
+  *shuffle_op = new_shuffle_op;
+
+  return Status::OK();
+}
+
+// Common code for computing a default shuffle size
+Status DEPipeline::ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
+                                      int64_t *shuffle_size) {
+  const int64_t average_files_multiplier = 4;
+  const int64_t shuffle_max = 10000;
+  int64_t avg_rows_per_file = 0;
+
+  // Adjust the num rows per shard if sharding was given
+  if (num_devices > 0) {
+    if (num_rows % num_devices == 0) {
+      num_rows = num_rows / num_devices;
+    } else {
+      num_rows = (num_rows / num_devices) + 1;
+    }
+  }
+
+  // Cap based on total rows directive.  Some ops do not have this and give value of 0.
+  if (total_rows > 0) {
+    num_rows = std::min(num_rows, total_rows);
+  }
+
+  // get the average per file
+  avg_rows_per_file = num_rows / num_files;
+
+  *shuffle_size = std::max(avg_rows_per_file * average_files_multiplier, shuffle_max);
  return Status::OK();
 }
 }  // namespace dataset
--- a/mindspore/ccsrc/dataset/api/de_pipeline.h
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.h
@ -77,7 +77,7 @@ class DEPipeline {
  ~DEPipeline();

  // Function to add a Node to the Execution Tree.
-  Status AddNodeToTree(const OpName &op_name, const py::dict &args, DsOpPtr *out);
+  Status AddNodeToTree(const OpName &op_name, const py::dict &args, py::dict *output);

  // Function to add a child and parent relationship.
  static Status AddChildToParentNode(const DsOpPtr &child_op, const DsOpPtr &parent_op);
@ -104,73 +104,74 @@ class DEPipeline {

  int GetRepeatCount() const;

-  Status ParseShuffleOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseShuffleOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseMindRecordOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseMindRecordOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

  Status BuildMindrecordSamplerChain(const py::handle &handle,
                                     std::vector<std::shared_ptr<mindrecord::ShardOperator>> *operators,
                                     int num_padded);

-  Status ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseFilterOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseFilterOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseRepeatOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseRepeatOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseSkipOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseSkipOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseBatchOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseBatchOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseBucketBatchByLengthOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseBucketBatchByLengthOp(const py::dict &args, std::shared_ptr<DatasetOp> *top,
+                                    std::shared_ptr<DatasetOp> *bottom);

-  Status ParseBarrierOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseBarrierOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseGeneratorOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseGeneratorOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseRenameOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseRenameOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseTakeOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseTakeOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseZipOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseZipOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseConcatOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseConcatOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseDeviceQueueOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseDeviceQueueOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseTFReaderOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseTFReaderOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseProjectOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseProjectOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseImageFolderOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseImageFolderOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseManifestOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseManifestOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseVOCOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseVOCOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseCocoOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseCocoOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseCifar10Op(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseCifar10Op(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseCifar100Op(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseCifar100Op(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseRandomDataOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseRandomDataOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

  void PrintTree();

  int32_t GetNumClasses() const;

-  Status ParseMnistOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseMnistOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

  Status SetBatchParameters(const py::dict &args);

-  Status ParseCelebAOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseCelebAOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseTextFileOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseTextFileOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseBuildVocabOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseBuildVocabOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

-  Status ParseClueOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+  Status ParseClueOp(const py::dict &args, std::shared_ptr<DatasetOp> *top, std::shared_ptr<DatasetOp> *bottom);

 private:
  // Execution tree that links the dataset operators.
@ -180,6 +181,25 @@ class DEPipeline {

  static Status ParsePadInfo(py::handle value, PadInfo *pad_info);

+  /// \brief Helper function to inject a shuffle operator over top of the current operation being built.
+  /// \param[in] shuffle_size The size to use in the shuffle buffer
+  /// \param[in] input_op The operator to build shuffle on top of
+  /// \param[out] shuffle_op The top node of the created subtree (subtree contains two nodes). In this case it will be
+  ///     the shuffle operator
+  /// \return Status return code
+  Status AddShuffleOp(int64_t shuffle_size, std::shared_ptr<DatasetOp> input_op,
+                      std::shared_ptr<DatasetOp> *shuffle_op);
+
+  /// \brief Helper function to compute the shuffle size
+  /// \param[in] num_files The number of files in the dataset
+  /// \param[in] num_devices The number of devices in the dataset
+  /// \param[in] num_rows The number of rows in the dataset
+  /// \param[in] total_rows An upper bound on the total rows in the dataset
+  /// \param[out] shuffle_size The resultant computed shuffle size
+  /// \return Status return code
+  Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
+                            int64_t *shuffle_size);
+
  int batch_size_;
  int repeat_num_;
  int num_rows_;
--- a/mindspore/ccsrc/dataset/api/python_bindings.cc
+++ b/mindspore/ccsrc/dataset/api/python_bindings.cc
@ -63,12 +63,14 @@
 #include "dataset/kernels/image/random_horizontal_flip_bbox_op.h"
 #include "dataset/kernels/image/random_horizontal_flip_op.h"
 #include "dataset/kernels/image/random_resize_op.h"
+#include "dataset/kernels/image/random_resize_with_bbox_op.h"
 #include "dataset/kernels/image/random_rotation_op.h"
 #include "dataset/kernels/image/random_vertical_flip_op.h"
 #include "dataset/kernels/image/random_vertical_flip_with_bbox_op.h"
 #include "dataset/kernels/image/rescale_op.h"
 #include "dataset/kernels/image/resize_bilinear_op.h"
 #include "dataset/kernels/image/resize_op.h"
+#include "dataset/kernels/image/resize_with_bbox_op.h"
 #include "dataset/kernels/image/uniform_aug_op.h"
 #include "dataset/kernels/no_op.h"
 #include "dataset/text/kernels/jieba_tokenizer_op.h"
@ -116,9 +118,9 @@ void bindDEPipeline(py::module *m) {
    .def(
      "AddNodeToTree",
      [](DEPipeline &de, const OpName &op_name, const py::dict &args) {
-        DsOpPtr op;
-        THROW_IF_ERROR(de.AddNodeToTree(op_name, args, &op));
-        return op;
+        py::dict out;
+        THROW_IF_ERROR(de.AddNodeToTree(op_name, args, &out));
+        return out;
      },
      py::return_value_policy::reference)
    .def_static("AddChildToParentNode",
@ -348,6 +350,18 @@ void bindTensorOps1(py::module *m) {
    .def(py::init<int32_t, int32_t, InterpolationMode>(), py::arg("targetHeight"),
         py::arg("targetWidth") = ResizeOp::kDefWidth, py::arg("interpolation") = ResizeOp::kDefInterpolation);

+  (void)py::class_<ResizeWithBBoxOp, TensorOp, std::shared_ptr<ResizeWithBBoxOp>>(
+    *m, "ResizeWithBBoxOp", "Tensor operation to resize an image. Takes height, width and mode.")
+    .def(py::init<int32_t, int32_t, InterpolationMode>(), py::arg("targetHeight"),
+         py::arg("targetWidth") = ResizeWithBBoxOp::kDefWidth,
+         py::arg("interpolation") = ResizeWithBBoxOp::kDefInterpolation);
+
+  (void)py::class_<RandomResizeWithBBoxOp, TensorOp, std::shared_ptr<RandomResizeWithBBoxOp>>(
+    *m, "RandomResizeWithBBoxOp",
+    "Tensor operation to resize an image using a randomly selected interpolation. Takes height and width.")
+    .def(py::init<int32_t, int32_t>(), py::arg("targetHeight"),
+         py::arg("targetWidth") = RandomResizeWithBBoxOp::kDefTargetWidth);
+
  (void)py::class_<UniformAugOp, TensorOp, std::shared_ptr<UniformAugOp>>(
    *m, "UniformAugOp", "Tensor operation to apply random augmentation(s).")
    .def(py::init<std::vector<std::shared_ptr<TensorOp>>, int32_t>(), py::arg("operations"),
--- a/mindspore/ccsrc/dataset/core/config_manager.cc
+++ b/mindspore/ccsrc/dataset/core/config_manager.cc
@ -41,6 +41,7 @@ Status ConfigManager::FromJson(const nlohmann::json &j) {
  set_worker_connector_size(j.value("workerConnectorSize", worker_connector_size_));
  set_op_connector_size(j.value("opConnectorSize", op_connector_size_));
  set_seed(j.value("seed", seed_));
+  set_monitor_sampling_interval(j.value("monitorSamplingInterval", monitor_sampling_interval_));
  return Status::OK();
 }

--- a/mindspore/ccsrc/dataset/core/data_type.cc
+++ b/mindspore/ccsrc/dataset/core/data_type.cc
@ -18,7 +18,6 @@
 #include "utils/log_adapter.h"

 #include "dataset/core/pybind_support.h"
-#include "dataset/util/de_error.h"

 namespace mindspore {
 namespace dataset {
--- a/mindspore/ccsrc/dataset/core/tensor.cc
+++ b/mindspore/ccsrc/dataset/core/tensor.cc
@ -152,7 +152,7 @@ Tensor::Tensor(const std::vector<std::string> &strings, const TensorShape &shape

  this->data_end_ = data_ + offset_arr[i];

-  DS_ASSERT(num_bytes == 0);
+  MS_ASSERT(num_bytes == 0);
  if (shape.known()) Tensor::Reshape(shape);
 }
 Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape)
@ -191,7 +191,7 @@ Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape

  data_end_ = data_ + offset_arr[i];

-  DS_ASSERT(num_bytes == 0);
+  MS_ASSERT(num_bytes == 0);
  if (shape.known()) Tensor::Reshape(shape);
 }
 Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl, const TensorShape &shape,
@ -420,7 +420,7 @@ bool Tensor::operator==(const Tensor &rhs) const {
 // Description: A function that print the value as specified by its index
 void Tensor::PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) const {
  Status rc;
-  DS_ASSERT(data_);
+  MS_ASSERT(data_);

  switch (type_.value()) {
    CASE_PRINT_HEX(DataType::DE_BOOL, bool);
--- a/mindspore/ccsrc/dataset/core/tensor.h
+++ b/mindspore/ccsrc/dataset/core/tensor.h
@ -33,7 +33,6 @@
 #include "dataset/core/data_type.h"
 #include "dataset/core/tensor_shape.h"
 #include "dataset/util/allocator.h"
-#include "dataset/util/de_error.h"
 #include "dataset/util/status.h"
 #include "proto/example.pb.h"

--- a/mindspore/ccsrc/dataset/core/tensor_shape.cc
+++ b/mindspore/ccsrc/dataset/core/tensor_shape.cc
@ -22,7 +22,6 @@
 #include "common/utils.h"
 #include "utils/log_adapter.h"
 #include "dataset/core/constants.h"
-#include "dataset/util/de_error.h"

 namespace mindspore {
 namespace dataset {
--- a/mindspore/ccsrc/dataset/engine/connector.h
+++ b/mindspore/ccsrc/dataset/engine/connector.h
@ -97,13 +97,15 @@ class Connector {
  virtual Status Pop(int32_t worker_id,  // The worker-id of the caller. See the requirement at the top of this file.
                     T *result) noexcept {
    {
-      DS_ASSERT(worker_id < num_consumers_);
+      MS_ASSERT(worker_id < num_consumers_);
      std::unique_lock<std::mutex> lk(m_);
      RETURN_IF_NOT_OK(cv_.Wait(&lk, [this, worker_id]() { return expect_consumer_ == worker_id; }));
      RETURN_IF_NOT_OK(queues_[pop_from_]->PopFront(result));
      pop_from_ = (pop_from_ + 1) % num_producers_;
+      out_buffers_count_++;
      expect_consumer_ = (expect_consumer_ + 1) % num_consumers_;
    }
+
    cv_.NotifyAll();
    return Status::OK();
  }
@ -114,19 +116,21 @@ class Connector {
  // @param worker_id The id of a worker thread calling this method.
  // @param el A const lvalue element to be passed/added/pushed.
  Status Push(int32_t worker_id, const T &el) noexcept {
-    DS_ASSERT(worker_id < static_cast<int32_t>(queues_.size()));
-    DS_ASSERT(queues_[worker_id] != nullptr);
+    MS_ASSERT(worker_id < static_cast<int32_t>(queues_.size()));
+    MS_ASSERT(queues_[worker_id] != nullptr);
    return (queues_[worker_id]->Add(el));
  }

+  auto out_buffers_count() const { return out_buffers_count_.load(); }
+
  // Add an element into the DbConnector without the overhead of synchronization.
  // It may block when the internal queue is full.
  // The element passed to this function will be forwarded into the internal queue.
  // @param worker_id The id of a worker thread calling this method.
  // @param el An element to be passed/added/pushed.
  virtual Status Push(int32_t worker_id, T &&el) noexcept {
-    DS_ASSERT(worker_id < static_cast<int32_t>(queues_.size()));
-    DS_ASSERT(queues_[worker_id] != nullptr);
+    MS_ASSERT(worker_id < static_cast<int32_t>(queues_.size()));
+    MS_ASSERT(queues_[worker_id] != nullptr);
    return (queues_[worker_id]->Add(std::forward<T>(el)));
  }

@ -138,6 +142,7 @@ class Connector {
    }
    expect_consumer_ = 0;
    pop_from_ = 0;
+    out_buffers_count_ = 0;
    MS_LOG(DEBUG) << "Connector counters reset.";
  }

@ -198,6 +203,7 @@ class Connector {
  // Used in the Pop(), when a thread call pop() but it is not the expect_consumer_.
  std::mutex m_;
  CondVar cv_;
+  std::atomic<std::int64_t> out_buffers_count_ = 0;
 };
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/data_schema.cc
+++ b/mindspore/ccsrc/dataset/engine/data_schema.cc
@ -27,7 +27,6 @@
 #include "dataset/util/status.h"
 #include "dataset/core/tensor_shape.h"
 #include "utils/log_adapter.h"
-#include "dataset/util/de_error.h"

 namespace mindspore {
 namespace dataset {
@ -184,35 +183,7 @@ TensorShape ColDescriptor::shape() const {
 const char DataSchema::DEFAULT_DATA_SCHEMA_FILENAME[] = "datasetSchema.json";

 // Constructor 1: Simple constructor that leaves things uninitialized.
-DataSchema::DataSchema() : dataset_type_(DatasetType::kUnknown), num_rows_(0) {}
-
-DatasetType DataSchema::GetDatasetTYpeFromString(const std::string &type) const {
-  // Convert the string to a more easy to manage enum flavour of the buffer type.
-  if (type == "ARROW") {
-    return DatasetType::kArrow;
-  } else if (type == "TF") {
-    return DatasetType::kTf;
-  } else {
-    return DatasetType::kUnknown;
-  }
-}
-
-Status DataSchema::LoadDatasetType(const std::string &schema_file_path) {
-  try {
-    std::ifstream in(schema_file_path);
-    nlohmann::json js;
-    in >> js;
-    // First, get the column for the type of dataset.
-    dataset_type_str_ = js.value("datasetType", "");
-    dataset_type_ = GetDatasetTYpeFromString(dataset_type_str_);
-    dir_structure_ = js.value("directoryStructure", "");
-  }
-  // Catch any exception and convert to Status return code
-  catch (const std::exception &err) {
-    RETURN_STATUS_UNEXPECTED("Schema file failed to load");
-  }
-  return Status::OK();
-}
+DataSchema::DataSchema() : num_rows_(0) {}

 // Internal helper function. Parses the json schema file in any order and produces a schema that
 // does not follow any particular order (json standard does not enforce any ordering protocol).
@ -400,8 +371,6 @@ Status DataSchema::LoadSchemaString(const std::string &schema_json_string,
    nlohmann::json js = nlohmann::json::parse(schema_json_string);
    RETURN_IF_NOT_OK(PreLoadExceptionCheck(js));
    num_rows_ = js.value("numRows", 0);
-    dataset_type_str_ = js.value("datasetType", "");
-    dataset_type_ = GetDatasetTYpeFromString(dataset_type_str_);
    nlohmann::json column_tree = js.at("columns");
    if (column_tree.empty()) {
      RETURN_STATUS_UNEXPECTED("columns is null");
@ -425,22 +394,16 @@ DataSchema::~DataSchema() = default;

 // Getter for the ColDescriptor by index
 const ColDescriptor &DataSchema::column(int32_t idx) const {
-  DS_ASSERT(idx < static_cast<int>(col_descs_.size()));
+  MS_ASSERT(idx < static_cast<int>(col_descs_.size()));
  return col_descs_[idx];
 }

 // A print method typically used for debugging
 void DataSchema::Print(std::ostream &out) const {
-  out << "Dataset type string : (";
-  if (dataset_type_str_.empty()) {
-    out << "none specified)\n";
-  } else {
-    out << dataset_type_str_ << ")\n";
-  }
+  out << "Dataset schema: (";
  for (const auto &col_desc : col_descs_) {
    out << col_desc << "\n";
  }
-  out << "Dataset type: " << static_cast<uint32_t>(dataset_type_) << "\n";
 }

 // Adds a column descriptor to the schema
--- a/mindspore/ccsrc/dataset/engine/data_schema.h
+++ b/mindspore/ccsrc/dataset/engine/data_schema.h
@ -30,196 +30,176 @@

 namespace mindspore {
 namespace dataset {
-// A simple class to provide meta info about a column.
+/// \class ColDescriptor data_schema.h
+/// \brief A simple class to provide meta info about a column.
 class ColDescriptor {
 public:
-  // Constructor 1: Simple constructor that leaves things uninitialized.
+  /// \brief Constructor 1: Simple constructor that leaves things uninitialized.
  ColDescriptor();

-  // Constructor 2: Main constructor
-  // @param col_name - The name of the column
-  // @param col_type - The DE Datatype of the column
-  // @param tensor_impl - The (initial) type of tensor implementation for the column
-  // @param rank - The number of dimension of the data
-  // @param in_shape - option argument for input shape
+  /// \brief Constructor 2: Main constructor
+  /// \param[in] col_name - The name of the column
+  /// \param[in] col_type - The DE Datatype of the column
+  /// \param[in] tensor_impl - The (initial) type of tensor implementation for the column
+  /// \param[in] rank - The number of dimension of the data
+  /// \param[in] in_shape - option argument for input shape
  ColDescriptor(const std::string &col_name, DataType col_type, TensorImpl tensor_impl, int32_t rank,
                const TensorShape *in_shape = nullptr);

-  // Explicit copy constructor is required
-  // @param in_cd - the source ColDescriptor
+  /// \brief Explicit copy constructor is required
+  /// \param[in] in_cd - the source ColDescriptor
  ColDescriptor(const ColDescriptor &in_cd);

-  // Assignment overload
-  // @param in_cd - the source ColDescriptor
+  /// \brief Assignment overload
+  /// \param in_cd - the source ColDescriptor
  ColDescriptor &operator=(const ColDescriptor &in_cd);

-  // Destructor
+  /// \brief Destructor
  ~ColDescriptor();

-  // A print method typically used for debugging
-  // @param out - The output stream to write output to
+  /// \brief A print method typically used for debugging
+  /// \param out - The output stream to write output to
  void Print(std::ostream &out) const;

-  // Given a number of elements, this function will compute what the actual Tensor shape would be.
-  // If there is no starting TensorShape in this column, or if there is a shape but it contains
-  // an unknown dimension, then the output shape returned shall resolve dimensions as needed.
-  // @param num_elements - The number of elements in the data for a Tensor
-  // @param out_shape - The materialized output Tensor shape
-  // @return Status - The error code return
+  /// \brief Given a number of elements, this function will compute what the actual Tensor shape would be.
+  ///     If there is no starting TensorShape in this column, or if there is a shape but it contains
+  ///     an unknown dimension, then the output shape returned shall resolve dimensions as needed.
+  /// \param[in] num_elements - The number of elements in the data for a Tensor
+  /// \param[inout] out_shape - The materialized output Tensor shape
+  /// \return Status - The error code return
  Status MaterializeTensorShape(int32_t num_elements, TensorShape *out_shape) const;

-  // << Stream output operator overload
-  // @notes This allows you to write the debug print info using stream operators
-  // @param out - reference to the output stream being overloaded
-  // @param cd - reference to the ColDescriptor to display
-  // @return - the output stream must be returned
+  /// \brief << Stream output operator overload
+  ///     This allows you to write the debug print info using stream operators
+  /// \param[in] out - reference to the output stream being overloaded
+  /// \param[in] cd - reference to the ColDescriptor to display
+  /// \return - the output stream must be returned
  friend std::ostream &operator<<(std::ostream &out, const ColDescriptor &cd) {
    cd.Print(out);
    return out;
  }

-  // getter function
-  // @return The column's DataType
+  /// \brief getter function
+  /// \return The column's DataType
  DataType type() const { return type_; }

-  // getter function
-  // @return The column's rank
+  /// \brief getter function
+  /// \return The column's rank
  int32_t rank() const { return rank_; }

-  // getter function
-  // @return The column's name
+  /// \brief getter function
+  /// \return The column's name
  std::string name() const { return col_name_; }

-  // getter function
-  // @return The column's shape
+  /// \brief getter function
+  /// \return The column's shape
  TensorShape shape() const;

-  // getter function
-  // @return TF if the column has an assigned fixed shape.
+  /// \brief getter function
+  /// \return TF if the column has an assigned fixed shape.
  bool hasShape() const { return tensor_shape_ != nullptr; }

-  // getter function
-  // @return The column's tensor implementation type
+  /// \brief getter function
+  /// \return The column's tensor implementation type
  TensorImpl tensorImpl() const { return tensor_impl_; }

 private:
  DataType type_;                              // The columns type
  int32_t rank_;                               // The rank for this column (number of dimensions)
-  TensorImpl tensor_impl_;                     // The initial flavour of the tensor for this column.
+  TensorImpl tensor_impl_;                     // The initial flavour of the tensor for this column
  std::unique_ptr<TensorShape> tensor_shape_;  // The fixed shape (if given by user)
  std::string col_name_;                       // The name of the column
 };

-// A list of the columns.
+/// \class DataSchema data_schema.h
+/// \brief A list of the columns.
 class DataSchema {
 public:
-  // Constructor
+  /// \brief Constructor
  DataSchema();

-  // Destructor
+  /// \brief Destructor
  ~DataSchema();

-  // Populates the schema with a dataset type from a json file.  It does not populate any of the
-  // column info. To populate everything, use loadSchema() afterwards.
-  // @param schema_file_path - Absolute path to the schema file to use for getting dataset type info.
-  Status LoadDatasetType(const std::string &schema_file_path);
-
-  // Parses a schema json file and populates the columns and meta info.
-  // @param schema_file_path - the schema file that has the column's info to load
-  // @param columns_to_load - list of strings for columns to load. if empty, assumes all columns.
-  // @return Status - The error code return
+  /// \brief Parses a schema json file and populates the columns and meta info.
+  /// \param[in] schema_file_path - the schema file that has the column's info to load
+  /// \param[in] columns_to_load - list of strings for columns to load. if empty, assumes all columns.
+  /// \return Status - The error code return
  Status LoadSchemaFile(const std::string &schema_file_path, const std::vector<std::string> &columns_to_load);

-  // Parses a schema JSON string and populates the columns and meta info.
-  // @param schema_json_string - the schema file that has the column's info to load
-  // @param columns_to_load - list of strings for columns to load. if empty, assumes all columns.
-  // @return Status - The error code return
+  /// \brief Parses a schema JSON string and populates the columns and meta info.
+  /// \param[in] schema_json_string - the schema file that has the column's info to load
+  /// \param[in] columns_to_load - list of strings for columns to load. if empty, assumes all columns.
+  /// \return Status - The error code return
  Status LoadSchemaString(const std::string &schema_json_string, const std::vector<std::string> &columns_to_load);

-  // A print method typically used for debugging
-  // @param out - The output stream to write output to
+  /// \brief A print method typically used for debugging
+  /// \param[in] out - The output stream to write output to
  void Print(std::ostream &out) const;

-  // << Stream output operator overload
-  // @notes This allows you to write the debug print info using stream operators
-  // @param out - reference to the output stream being overloaded
-  // @param ds - reference to the DataSchema to display
-  // @return - the output stream must be returned
+  /// \brief << Stream output operator overload. This allows you to write the debug print info using stream operators
+  /// \param[in] out - reference to the output stream being overloaded
+  /// \param[in] ds - reference to the DataSchema to display
+  /// \return - the output stream must be returned
  friend std::ostream &operator<<(std::ostream &out, const DataSchema &ds) {
    ds.Print(out);
    return out;
  }

-  // Adds a column descriptor to the schema
-  // @param cd - The ColDescriptor to add
-  // @return Status - The error code return
+  /// \brief Adds a column descriptor to the schema
+  /// \param[in] cd - The ColDescriptor to add
+  /// \return Status - The error code return
  Status AddColumn(const ColDescriptor &cd);

-  // Setter
-  // @param in_type - The Dataset type to set into the schema
-  void set_dataset_type(DatasetType in_type) { dataset_type_ = in_type; }
-
-  // getter
-  // @return The dataset type of the schema
-  DatasetType dataset_type() const { return dataset_type_; }
-
-  // getter
-  // @return The reference to a ColDescriptor to get (const version)
+  /// \brief getter
+  /// \return The reference to a ColDescriptor to get (const version)
  const ColDescriptor &column(int32_t idx) const;

-  // getter
-  // @return The number of columns in the schema
+  /// \brief getter
+  /// \return The number of columns in the schema
  int32_t NumColumns() const { return col_descs_.size(); }

  bool Empty() const { return NumColumns() == 0; }

-  std::string dir_structure() const { return dir_structure_; }
-
-  std::string dataset_type_str() const { return dataset_type_str_; }
-
+  /// \brief getter
+  /// \return The number of rows read from schema
  int64_t num_rows() const { return num_rows_; }

  static const char DEFAULT_DATA_SCHEMA_FILENAME[];

-  // Loops through all columns in the schema and returns a map with the column
-  // name to column index number.
-  // @param out_column_name_map - The output map of columns names to column index
-  // @return Status - The error code return
+  /// \brief Loops through all columns in the schema and returns a map with the column name to column index number.
+  /// \param[inout] out_column_name_map - The output map of columns names to column index
+  /// \return Status - The error code return
  Status GetColumnNameMap(std::unordered_map<std::string, int32_t> *out_column_name_map);

 private:
-  // Internal helper function. Parses the json schema file in any order and produces a schema that
-  // does not follow any particular order (json standard does not enforce any ordering protocol).
-  // This one produces a schema that contains all of the columns from the schema file.
-  // @param column_tree - The nlohmann tree from the json file to parse
-  // @return Status - The error code return
+  /// \brief Internal helper function. Parses the json schema file in any order and produces a schema that
+  ///     does not follow any particular order (json standard does not enforce any ordering protocol).
+  ///     This one produces a schema that contains all of the columns from the schema file.
+  /// \param[in] column_tree - The nlohmann tree from the json file to parse
+  /// \return Status - The error code return
  Status AnyOrderLoad(nlohmann::json column_tree);

-  // Internal helper function. For each input column name, perform a lookup to the json document to
-  // find the matching column.  When the match is found, process that column to build the column
-  // descriptor and add to the schema in the order in which the input column names are given.
-  // @param column_tree - The nlohmann tree from the json file to parse
-  // @param columns_to_load - list of strings for the columns to add to the schema
-  // @return Status - The error code return
+  /// \brief Internal helper function. For each input column name, perform a lookup to the json document to
+  ///     find the matching column.  When the match is found, process that column to build the column
+  ///     descriptor and add to the schema in the order in which the input column names are given.
+  /// \param[in] column_tree - The nlohmann tree from the json file to parse
+  /// \param[in] columns_to_load - list of strings for the columns to add to the schema
+  /// \return Status - The error code return
  Status ColumnOrderLoad(nlohmann::json column_tree, const std::vector<std::string> &columns_to_load);

-  // Internal helper function. Given the json tree for a given column, load it into our schema.
-  // @param columnTree - The nlohmann child tree for a given column to load.
-  // @param col_name - The string name of the column for that subtree.
-  // @return Status - The error code return
+  /// \brief Internal helper function. Given the json tree for a given column, load it into our schema.
+  /// \param[in] columnTree - The nlohmann child tree for a given column to load.
+  /// \param[in] col_name - The string name of the column for that subtree.
+  /// \return Status - The error code return
  Status ColumnLoad(nlohmann::json column_child_tree, const std::string &col_name);

-  // Internal helper function. Performs sanity checks on the json file setup.
-  // @param js - The nlohmann tree for the schema file
-  // @return Status - The error code return
+  /// \brief Internal helper function. Performs sanity checks on the json file setup.
+  /// \param[in] js - The nlohmann tree for the schema file
+  /// \return Status - The error code return
  Status PreLoadExceptionCheck(const nlohmann::json &js);

-  DatasetType GetDatasetTYpeFromString(const std::string &type) const;
-
  std::vector<ColDescriptor> col_descs_;  // Vector of column descriptors
-  std::string dataset_type_str_;          // A string that represents the type of dataset
-  DatasetType dataset_type_;              // The numeric form of the dataset type from enum
-  std::string dir_structure_;             // Implicit or flatten
  int64_t num_rows_;
 };
 }  // namespace dataset
--- a/mindspore/ccsrc/dataset/engine/dataset_iterator.cc
+++ b/mindspore/ccsrc/dataset/engine/dataset_iterator.cc
@ -27,7 +27,7 @@
 namespace mindspore {
 namespace dataset {
 // Constructor of the IteratorBase
-IteratorBase::IteratorBase() : curr_buffer_(nullptr), eof_handled_(false), first_row_(true) {}
+IteratorBase::IteratorBase() : curr_buffer_(nullptr), eof_handled_(false) {}

 IteratorBase::~IteratorBase() = default;

@ -51,13 +51,10 @@ Status IteratorBase::GetNextAsMap(TensorMap *out_map) {
  // The column name mapping comes from the source operator that is producing the data into the iterator.
  // To avoid having to fetch this for every time, we'll take a local copy of the column name id mapping
  // and save in the iterator.  We only have to do this once.  All subsequent iterations use the same mapping.
-  // Note: This can only be done after the first row has been produced, as this guarantees the the child has
-  // it's column mapping set up.
-  if (first_row_) {
+  if (col_name_id_map_.empty()) {
    // Determine the column name map by calling the derived class method to retrieve the column
    // name map
    col_name_id_map_ = this->GetColumnNameMap();
-    first_row_ = false;
  }

  // Populate the out map from the row and return it
--- a/mindspore/ccsrc/dataset/engine/dataset_iterator.h
+++ b/mindspore/ccsrc/dataset/engine/dataset_iterator.h
@ -72,7 +72,6 @@ class IteratorBase {
 protected:
  std::unique_ptr<DataBuffer> curr_buffer_;  // holds the current buffer
  bool eof_handled_;                         // T/F if this op got an eof
-  bool first_row_;                           // internal tracking for first row case
  std::unordered_map<std::string, int32_t> col_name_id_map_;
 };

--- a/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc
@ -144,9 +144,6 @@ Status BarrierOp::prepare(TensorQTable *const table) {

  table->push_back(std::move(new_row));

-  // Assign the column name id map
-  RETURN_IF_NOT_OK(DatasetOp::AssignColMapFromChild());
-
  // the update code below shouldn't do anything bad if the column name already exists.
  return Status::OK();
 }
--- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
@ -76,7 +76,6 @@ Status BatchOp::operator()() {
  std::unique_ptr<TensorQTable> table = std::make_unique<TensorQTable>();
  child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0);
  RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
-  RETURN_IF_NOT_OK(DatasetOp::AssignColMapFromChild());  // must come after the first fetch above
  int32_t cur_batch_size = 0;
  RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(0, 0, 0)));
  while (child_iterator_->eof_handled() == false) {
@ -410,7 +409,7 @@ Status BatchOp::UnpackPadInfo(const PadInfo &pad_info,
 // Visitor accept method for NodePass
 Status BatchOp::Accept(NodePass *p, bool *modified) {
  // Downcast shared pointer then call visitor
-  return p->RunOnNode(std::static_pointer_cast<BatchOp>(shared_from_this()), modified);
+  return p->RunOnNode(shared_from_base<BatchOp>(), modified);
 }

 }  // namespace dataset
--- a/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.cc
@ -115,7 +115,6 @@ Status BucketBatchByLengthOp::operator()() {
  TensorRow current_row;
  child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0);
  RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&current_row));
-  RETURN_IF_NOT_OK(AssignColMapFromChild());
  while (!child_iterator_->eof_handled()) {
    while (!current_row.empty()) {
      int32_t element_length;
--- a/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.cc
@ -86,7 +86,6 @@ Status BuildVocabOp::operator()() {
  child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0);
  TensorRow new_row;
  RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
-  RETURN_IF_NOT_OK(AssignColMapFromChild());
  if (!col_names_.empty()) {
    col_ids_.reserve(col_names_.size());
    for (std::string col : col_names_) {
--- a/mindspore/ccsrc/dataset/engine/datasetops/concat_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/concat_op.cc
@ -66,12 +66,6 @@ Status ConcatOp::operator()() {
  std::unique_ptr<DataBuffer> buf;
  RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf));

-  // Obtain columns_name_id_map from child_[0]
-  column_name_id_map_ = child_[0]->column_name_id_map();
-  if (column_name_id_map_.empty()) {
-    RETURN_STATUS_UNEXPECTED("Child column name map cannot be empty!");
-  }
-
  int eof_count = 0;
  while (eof_count != children_num_) {
    for (int i = 0; i < children_num_; i++) {
@ -115,17 +109,13 @@ Status ConcatOp::Verify(int32_t id, const std::unique_ptr<DataBuffer> &buf) {
  buf->GetRow(0, &new_row);

  if (id == 0) {
-    // Obtain the column name, data type and data rank in child[0]
-    column_name_id_ = child_[id]->column_name_id_map();
+    // Obtain the data type and data rank in child[0]
    for (auto item : new_row) {
      data_type_.push_back(item->type());
      data_rank_.push_back(item->Rank());
    }
  } else {
-    // Compare the column name, data type and data rank with these in child[0]
-    if (child_[id]->column_name_id_map() != column_name_id_) {
-      RETURN_STATUS_UNEXPECTED("The column name or column order is not the same with previous dataset.");
-    }
+    // Compare the data type and data rank with these in child[0]
    int32_t index = 0;
    for (auto item : new_row) {
      if ((item->type() != data_type_[index]) || item->Rank() != data_rank_[index++]) {
@ -138,7 +128,27 @@ Status ConcatOp::Verify(int32_t id, const std::unique_ptr<DataBuffer> &buf) {

 Status ConcatOp::PrepareNodePostAction() {
  RETURN_IF_NOT_OK(PipelineOp::PrepareNodePostAction());
-  tree_->AddToRepeatStack(shared_from_this());
+  tree_->AddToEOEOpStack(shared_from_this());
+  return Status::OK();
+}
+
+// We need to overwrite the super class ComputeColMap here because the number of children is more than 1.
+Status ConcatOp::ComputeColMap() {
+  if (column_name_id_map_.empty()) {
+    // Obtain columns_name_id_map from child_[0]
+    column_name_id_map_ = child_[0]->column_name_id_map();
+    if (column_name_id_map_.empty()) {
+      RETURN_STATUS_UNEXPECTED("Child column name map cannot be empty!");
+    }
+    // Verify all children have the same column name map
+    for (int32_t i = 0; i < child_.size(); ++i) {
+      if (child_[i]->column_name_id_map() != column_name_id_map_) {
+        RETURN_STATUS_UNEXPECTED("The column name or column order is not the same with previous dataset.");
+      }
+    }
+  } else {
+    MS_LOG(WARNING) << "Column name map is already set!";
+  }
  return Status::OK();
 }
 }  // namespace dataset
--- a/mindspore/ccsrc/dataset/engine/datasetops/concat_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/concat_op.h
@ -85,6 +85,10 @@ class ConcatOp : public PipelineOp {
  // @return Name of the current Op
  std::string Name() const override { return "ConcatOp"; }

+  // Private function for computing the assignment of the column name map.
+  // @return - Status
+  Status ComputeColMap() override;
+
 private:
  Status Verify(int32_t id, const std::unique_ptr<DataBuffer> &buf);

--- a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc
@ -18,29 +18,31 @@
 #include <iomanip>
 #include <iostream>
 #include <memory>
+#include <regex>
 #include <utility>
 #include <string>
 #include <algorithm>

 #include "dataset/engine/execution_tree.h"
 #include "dataset/engine/datasetops/device_queue_op.h"
+#include "dataset/engine/datasetops/source/sampler/sampler.h"
 #include "dataset/engine/data_buffer.h"
 #include "dataset/engine/db_connector.h"
 #include "dataset/engine/opt/pass.h"
-
+#include "utils/system/crc32c.h"
 #include "utils/log_adapter.h"

 namespace mindspore {
 namespace dataset {
 // Constructor
-DatasetOp::DatasetOp(int32_t op_connector_size)
+DatasetOp::DatasetOp(int32_t op_connector_size, std::shared_ptr<Sampler> sampler)
    : oc_queue_size_(op_connector_size),
+      sampler_(sampler),
      operator_id_(kInvalidOperatorId),
      tree_(nullptr),
      state_(OpState::kDeOpIdle),
      op_ctrl_flags_(kDeOpNone),
-      out_connector_(nullptr),
-      first_fetch_(true) {
+      out_connector_(nullptr) {
  // The operator starts out with an invalid operator id.  The only way to
  // get it out of invalid state is to assign the operator to an execution tree.
 }
@ -105,13 +107,58 @@ Status DatasetOp::InsertAsParent(std::shared_ptr<DatasetOp> to_add) {
 void DatasetOp::AddParent(DatasetOp *parent) { parent_.push_back(parent); }

 // Removes a parent operator from this operator
-void DatasetOp::RemoveParent(DatasetOp *parent) {
+void DatasetOp::RemoveParent(const DatasetOp *parent) {
  parent_.erase(std::remove(parent_.begin(), parent_.end(), parent), parent_.end());
 }

+// Removes this node from the tree and connects it's parent/child together
+Status DatasetOp::Remove() {
+  if (parent_.size() > 1) {
+    std::string err_msg("No support for op removal if the operator has more than one parent");
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  if (child_.size() > 1) {
+    std::string err_msg("No support for op removal if the operator has more than one child");
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+
+  // Scenario's when removing node B:
+  // A -> B -> C
+  // A -> B
+  // B -> C
+  //
+  // If we remove B, then first take our child A and update it's parent to be C
+  // It's possible the parent is null if we are the root node being removed.
+  if (!child_.empty()) {
+    // If we have a parent, then assign chlid's parent to point to our parent.
+    if (!parent_.empty()) {
+      child_[0]->parent_[0] = parent_[0];
+    } else {
+      // We don't have a parent, so we are the root node being removed.
+      // clear the parent list of our child so that it becomes the new root.
+      child_[0]->parent_.clear();
+      tree_->AssignRoot(child_[0]);
+    }
+  }
+
+  // Next, if we had a parent, then set it's child to be our child.
+  if (!parent_.empty()) {
+    // if we have a child, then set our parent to point to it
+    if (!child_.empty()) {
+      parent_[0]->child_[0] = child_[0];
+    } else {
+      // We don't have a child, so clear the child list of the current
+      // parent because it will be empty once we are removed.
+      parent_[0]->child_.clear();
+    }
+  }
+
+  return Status::OK();
+}
+
 // Getter function to get a shared pointer to our childAdds a operator to become our child.
 std::shared_ptr<DatasetOp> DatasetOp::child(int32_t child_index) const {
-  DS_ASSERT(child_index < static_cast<int>(child_.size()));
+  MS_ASSERT(child_index < static_cast<int>(child_.size()));
  // Return a shared pointer
  return child_[child_index];
 }
@ -151,6 +198,9 @@ void DatasetOp::Print(std::ostream &out, bool show_all) const {
    }
    out << "\nConnector queue size   : " << oc_queue_size_ << "\nOperator control flags : 0x" << std::hex
        << std::setw(8) << std::setfill('0') << op_ctrl_flags_ << std::dec << std::setfill(' ');
+    if (sampler_) {
+      sampler_->Print(out, show_all);
+    }
  }
 }

@ -223,11 +273,10 @@ Status DatasetOp::PrepareNodePreAction() {
 Status DatasetOp::PrepareNodePostAction() {
  // If this op does not have any children and it is in a repeat path of the tree...
  if (child_.empty() && BitTest(op_ctrl_flags_, kDeOpRepeated)) {
-    // push ourselves onto the tree repeat stack.  Later, the repeat operator
+    // push ourselves onto the eoe operator stack.  Later, a repeat/epoch ctrl operator
    // above us will consume them.
-    tree_->AddToRepeatStack(shared_from_this());
+    tree_->AddToEOEOpStack(shared_from_this());
  }
-
  // Creating Connector object for each op.
  // The consumer of the root node is assumed to be one thread.
  // If multiple threads are consuming from the root node, they will get the ordered data in round robin fashion.
@ -240,6 +289,10 @@ Status DatasetOp::PrepareNodePostAction() {
    RETURN_IF_NOT_OK(out_connector_->Register(tree_->AllTasks()));
  }
  RETURN_IF_NOT_OK(this->RegisterWorkerConnectors());
+
+  // Generate the column name map for the current op.
+  RETURN_IF_NOT_OK(this->ComputeColMap());
+
  return Status::OK();
 }

@ -262,38 +315,84 @@ std::string DatasetOp::ColumnNameMapAsString() const {
  return outStr;
 }

-// A helper function for providing assignment of the column name map.
-// This grabs the map from child 0 and assigns it into this op.
-// Can only be used if number of children is 1.
-Status DatasetOp::AssignColMapFromChild() {
+// Computing the assignment of the column name map.
+// This just inherits the column map from its first child, can only be used if the number of children is 1.
+// Operations changing the column map must overwrite this function.
+Status DatasetOp::ComputeColMap() {
  if (child_.size() > 1) {
    RETURN_STATUS_UNEXPECTED("Assigning column name map from child only works for single-child operators.");
  }
-  // Assign the correct column name map to this op by taking it from the input child.
-  // This must be done AFTER the first fetch, but only needs to be done once by the first worker to
-  // do the first fetch.
-  if (first_fetch_) {
-    // If there was a single worker, or this is being called from a master thread in a parallel op,
-    // then the mutex is not really needed here, although it's harmless.
-    std::unique_lock<std::mutex> lock(column_name_map_mutex_);
-    // If the map has not been set up yet, then we are the first one in to set it up. The first_fetch_ (dirty read)
-    // bool allows us to avoid acquiring the lock if the map has already been set.
+  if (column_name_id_map_.empty()) {
+    column_name_id_map_ = child_[0]->column_name_id_map();
    if (column_name_id_map_.empty()) {
-      column_name_id_map_ = child_[0]->column_name_id_map();
-      first_fetch_ = false;
-      if (column_name_id_map_.empty()) {
-        RETURN_STATUS_UNEXPECTED("Child column name map cannot be empty!");
-      }
+      RETURN_STATUS_UNEXPECTED("Child column name map cannot be empty!");
    }
-    MS_LOG(DEBUG) << "Setting column map after first fetch:\n" << DatasetOp::ColumnNameMapAsString();
+    MS_LOG(DEBUG) << "Setting column map:\n" << DatasetOp::ColumnNameMapAsString();
+  } else {
+    MS_LOG(WARNING) << "Column name map is already set!";
  }
  return Status::OK();
 }

+Status DatasetOp::PreAccept(NodePass *p, bool *modified) {
+  // DatasetOp is the base class of visitor target pre-visit.
+  // This method will only be called if its derived class does not implement one.
+  return p->PreRunOnNode(shared_from_this(), modified);
+}
+
 Status DatasetOp::Accept(NodePass *p, bool *modified) {
  // DatasetOp is the base class of visitor target.
  // This method will only be called if its derived class does not implement one.
  return p->RunOnNode(shared_from_this(), modified);
 }
+
+// A helper function with some common code that leaf nodes can use during
+// prepare phase for checking if they need to assign a sampler to the cache.
+Status DatasetOp::SaveSamplerForCache(bool random_access_op) {
+  // If we are a descendant under a cache op and we have a sampler, then save this sampler
+  // to a stack so that the cache can pick it up during it's processing above us.
+  if (sampler_) {
+    if (BitTest(tree_->PrepareFlags(), ExecutionTree::kDePrepCache)) {
+      // use move semantic to set our sampler_ to null after the move.  This is okay because a sampler is
+      // useless to a random data op.  It was only being used as a temporary holding until the cache can
+      // be created
+      tree_->AddToSamplerStack(sampler_);
+      MS_LOG(INFO) << "Preparing a leaf op: passing sampler up the tree for Cache handling.";
+    } else if (!random_access_op) {
+      // A sampler exists, but we are not in a caching tree and we are not a random access mappable leaf.
+      // This is an error because that type of leaf does not use sampling unless there's a cache to hook it into.
+      RETURN_STATUS_UNEXPECTED(
+        "Non-mappable leaf op has a sampler, but it only supports sampling if there is a cache after it in the tree");
+    }
+  }
+
+  if (!random_access_op) {
+    // Since we don't truly need the sampler for this non-mappable dataset and it's been saved for the cache
+    // we can remove it now from the base.
+    sampler_.reset();
+  }
+
+  return Status::OK();
+}
+uint32_t DatasetOp::GenerateCRC(const std::shared_ptr<DatasetOp> &op) {
+  std::stringstream ss;
+  op->tree_->Print(ss, op);
+  std::string ss_str = ss.str();
+
+  // Filter out the Operator control flags field when generating the check sum
+  ss_str = std::regex_replace(ss_str, std::regex("Operator control flags.*\n"), "");
+
+  // Filter out the Device id field to allow cache sharing for a distributed run of the same pipeline
+  ss_str = std::regex_replace(ss_str, std::regex("Device id.*\n"), "");
+  ss_str = std::regex_replace(ss_str, std::regex("device_id.*\n"), "");
+
+  // The Cache crc and Server cache id field is different when creating new cache_client and re-using the same
+  // cache_client later. So we filter out these two fields to allow cache sharing.
+  ss_str = std::regex_replace(ss_str, std::regex("Cache crc.*\n"), "");
+  ss_str = std::regex_replace(ss_str, std::regex("Server cache id.*\n"), "");
+
+  uint32_t cache_crc = system::Crc32c::GetMaskCrc32cValue(ss_str.c_str(), ss_str.length());
+  return cache_crc;
+}
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h
@ -34,8 +34,10 @@ class DataBuffer;

 class NodePass;

-// The base class DatasetOp is the main tree node.  It is an abstract class, so
-// the actual implementation of the operators will be derived from here.
+class Sampler;
+
+/// \brief The base class DatasetOp is the main tree node.  It is an abstract class, so
+/// the actual implementation of the operators will be derived from here.
 class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
  // Allow execution tree to access internal members
  friend class ExecutionTree;
@ -53,109 +55,114 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
  // Flags that control operator runtime behaviours
  enum OpState { kDeOpRunning = 0, kDeOpIdle = 1, kDeOpTerminated };

-  // Constructor
-  // @param op_connector_size - The size for the output connector of this operator.
-  explicit DatasetOp(int32_t op_connector_size);
+  /// Constructor
+  /// \param op_connector_size - The size for the output connector of this operator.
+  /// \param sampler - The sampler for the op
+  explicit DatasetOp(int32_t op_connector_size, std::shared_ptr<Sampler> sampler);

-  // Destructor
+  /// Destructor
  virtual ~DatasetOp() { tree_ = nullptr; }

-  // Adds a operator to become our child.
-  // @param child - shared pointer to the child to add.
+  /// Adds a operator to become our child.
+  /// \param child - shared pointer to the child to add.
  Status AddChild(std::shared_ptr<DatasetOp> child);

-  // Remove a operator from our children.
-  // @param child - shared pointer to the child to remove.
+  /// Remove a operator from our children.
+  /// \param child - shared pointer to the child to remove.
  Status RemoveChild(std::shared_ptr<DatasetOp> child);

-  // Getter function to get a shared pointer to our child
-  // @param child_index - An operator can have n children. Indicates choose which child to return.
+  /// \brief Removes this node from the tree and connects it's parent/child together.
+  /// \return Status eerror code returned
+  Status Remove();
+
+  /// \brief Getter function to get a shared pointer to our child
+  /// \param child_index - An operator can have n children. Indicates choose which child to return.
  std::shared_ptr<DatasetOp> child(int32_t child_index) const;

-  // Inserts a operator as the parent current op.
-  // Inserted op will become the sole parent of the current op.
-  // The existing parent of the current op will be transferred to the inserted op.
+  /// \brief Inserts a operator as the parent current op.
+  /// Inserted op will become the sole parent of the current op.
+  /// The existing parent of the current op will be transferred to the inserted op.
  Status InsertAsParent(std::shared_ptr<DatasetOp> to_add);

-  // Creates the connector within this operator
-  // @param num_producers - number of threads that write into this connector
-  // @param num_consumers - number of threads that read from this connector
+  /// \brief Creates the connector within this operator
+  /// \param num_producers - number of threads that write into this connector
+  /// \param num_consumers - number of threads that read from this connector
  void CreateConnector(int32_t num_producers, int32_t num_consumers);

-  // A print method typically used for debugging
-  // @param out - The output stream to write output to
-  // @param show_all - A bool to control if you want to show all info or just a summary
+  /// \brief A print method typically used for debugging
+  /// \param out - The output stream to write output to
+  /// \param show_all - A bool to control if you want to show all info or just a summary
  virtual void Print(std::ostream &out, bool show_all) const;

-  // << Stream output operator overload
-  // @notes This allows you to write the debug print info using stream operators
-  // @param out - reference to the output stream being overloaded
-  // @param dO - reference to the DatasetOp to display
-  // @return - the output stream must be returned
+  /// \brief << Stream output operator overload
+  /// \notes This allows you to write the debug print info using stream operators
+  /// \param out - reference to the output stream being overloaded
+  /// \param dO - reference to the DatasetOp to display
+  /// \return - the output stream must be returned
  friend std::ostream &operator<<(std::ostream &out, const DatasetOp &dO) {
    dO.Print(out, false);
    return out;
  }

-  // Class functor operator ().
-  // DatasetOps operate by launching a thread (see ExecutionTree).
-  // This pure virtual version makes the requirement that derived classes must provide a functor
-  // that will execute their main runtime loop code.
-  // @return Status - The error code return
+  /// \brief Class functor operator ().
+  /// DatasetOps operate by launching a thread (see ExecutionTree).
+  /// This pure virtual version makes the requirement that derived classes must provide a functor
+  /// that will execute their main runtime loop code.
+  /// \return Status - The error code return
  virtual Status operator()() = 0;

-  // Gets the next buffer from the given child
-  // @notes See GetNextInput for similar function that has built-in message handling
-  // @param p_buffer - The shared pointer for the fetched buffer to return (by reference)
-  // @param worker_id - The worker id
-  // @return Status - The error code return
+  /// \brief Gets the next buffer from the given child
+  /// \notes See GetNextInput for similar function that has built-in message handling
+  /// \param p_buffer - The shared pointer for the fetched buffer to return (by reference)
+  /// \param worker_id - The worker id
+  /// \return Status - The error code return
  virtual Status GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer, int32_t worker_id) {
    return GetNextBuffer(p_buffer, worker_id, false);
  }

-  // Gets the next buffer from the given child
-  // @notes See GetNextInput for similar function that has built-in message handling
-  // @param p_buffer - The shared pointer for the fetched buffer to return (by reference)
-  // @return Status - The error code return
+  /// \brief Gets the next buffer from the given child
+  /// \notes See GetNextInput for similar function that has built-in message handling
+  /// \param p_buffer - The shared pointer for the fetched buffer to return (by reference)
+  /// \return Status - The error code return
  virtual Status GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer) { return GetNextBuffer(p_buffer, 0, false); }

-  // Gets the next buffer from the given child
-  // @notes See GetNextInput for similar function that has built-in message handling
-  // @param p_buffer - The shared pointer for the fetched buffer to return (by reference)
-  // @param worker_id - The worker id
-  // @param retry_if_eoe Set this flag to true to allow calling pop() again after the first pop() returns EOE.
-  // @return Status - The error code return
+  /// \brief Gets the next buffer from the given child
+  /// \notes See GetNextInput for similar function that has built-in message handling
+  /// \param p_buffer - The shared pointer for the fetched buffer to return (by reference)
+  /// \param worker_id - The worker id
+  /// \param retry_if_eoe Set this flag to true to allow calling pop() again after the first pop() returns EOE.
+  /// \return Status - The error code return
  virtual Status GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer, int32_t worker_id, bool retry_if_eoe);

-  // Gets the next buffer from the given child .  This function also has built-in eoe and eof
-  // message handling so that child classes don't have to manually code pass-through logic when
-  // those messages are received.
-  // @param p_buffer - The shared pointer for the fetched buffer to return (by reference)
-  // @param worker_id - The worker id
-  // @return Status - The error code return
+  /// \brief Gets the next buffer from the given child .  This function also has built-in eoe and eof
+  /// message handling so that child classes don't have to manually code pass-through logic when
+  /// those messages are received.
+  /// \param p_buffer - The shared pointer for the fetched buffer to return (by reference)
+  /// \param worker_id - The worker id
+  /// \return Status - The error code return
  Status GetNextInput(std::unique_ptr<DataBuffer> *p_buffer, int32_t worker_id = 0, int32_t child_index = 0);

-  // Performs handling for when an eoe message is received.
-  // The base class implementation simply flows the eoe message to output. Derived classes
-  // may override if they need to perform special eoe handling.
-  // @param worker_id - The worker id
-  // @return Status - The error code return
+  /// \brief Performs handling for when an eoe message is received.
+  /// The base class implementation simply flows the eoe message to output. Derived classes
+  /// may override if they need to perform special eoe handling.
+  /// \param worker_id - The worker id
+  /// \return Status - The error code return
  virtual Status EoeReceived(int32_t worker_id);

-  // Performs handling for when an eof message is received.
-  // The base class implementation simply flows the eof message to output. Derived classes
-  // may override if they need to perform special eof handling.
-  // @param worker_id - The worker id
-  // @return Status - The error code return
+  /// \brief Performs handling for when an eof message is received.
+  /// The base class implementation simply flows the eof message to output. Derived classes
+  /// may override if they need to perform special eof handling.
+  /// \param worker_id - The worker id
+  /// \return Status - The error code return
  virtual Status EofReceived(int32_t worker_id);

-  // Derived classes may implement the reset function if the operator is stateful and needs
-  // specific reset handling that is not contained in this common code version of the reset
-  // @return Status - The error code return
+  /// \brief Derived classes may implement the reset function if the operator is stateful and needs
+  /// specific reset handling that is not contained in this common code version of the reset
+  /// \return Status - The error code return
  virtual Status Reset();

-  // This calls the reset function on this subtree in pre-order
-  // @return Status - The error code return
+  /// \brief This calls the reset function on this subtree in pre-order
+  /// \return Status - The error code return
  virtual Status ResetSubtree() {
    RETURN_IF_NOT_OK(Reset());
    for (const auto &c : child_) {
@ -164,64 +171,68 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
    return Status::OK();
  }

-  // During tree prepare phase, operators may have specific pre-operations to perform depending on
-  // their role.
-  // @notes Derived versions of this function should always call it's superclass version first
-  // before providing their own implementations.
+  /// \brief During tree prepare phase, operators may have specific pre-operations to perform depending on
+  /// their role.
+  /// \notes Derived versions of this function should always call it's superclass version first
+  /// before providing their own implementations.
  virtual Status PrepareNodePreAction();

-  // During tree prepare phase, operators may have specific post-operations to perform depending on
-  // their role.
-  // @notes Derived versions of this function should always call it's superclass version first
-  // before providing their own implementations.
+  /// \brief During tree prepare phase, operators may have specific post-operations to perform depending on
+  /// their role.
+  /// \notes Derived versions of this function should always call it's superclass version first
+  /// before providing their own implementations.
  virtual Status PrepareNodePostAction();

-  // Getter function
-  // @return The operator id
+  /// \brief Getter function
+  /// \return The operator id
  int32_t id() const { return operator_id_; }

-  // Getter function
-  // @return The prepare flags
+  /// \brief Getter function
+  /// \return The prepare flags
  virtual uint32_t PrepareFlags() const;

-  // Getter function
-  // @return The number of workers in this op
+  /// \brief Getter function
+  /// \return The number of workers in this op
  virtual int32_t num_workers() const = 0;

-  // Getter function
-  // @return The number of threads consuming from previous op.
+  /// \brief Getter function
+  /// \return The number of threads consuming from previous op.
  virtual int32_t num_consumers() const = 0;

-  // Getter function
-  // @return The number of threads producing to the output connector.
+  /// \brief Getter function
+  /// \return The number of threads producing to the output connector.
  virtual int32_t num_producers() const = 0;

-  // Getter function
-  // @return T/F if this is an inlined operator
+  /// \brief Getter function
+  /// \return T/F if this is an inlined operator
  bool inlined() const { return (oc_queue_size_ == 0); }

-  // Setter function
-  // @return Sets the control flags
+  /// \brief Setter function
+  /// \return Sets the control flags
  void set_control_flag(uint64_t flag) { BitSet(&op_ctrl_flags_, flag); }

-  // Register the internal worker connectors. No op unless it is a parallel op
-  // @return Status
+  /// \brief Setter function
+  /// \return Sets the control flags
+  void ClearControlFlag(uint64_t flag) { BitClear(&op_ctrl_flags_, flag); }
+
+  /// \brief Register the internal worker connectors. No op unless it is a parallel op
+  /// \return Status
  virtual Status RegisterWorkerConnectors() { return Status::OK(); }

-  // Getter for the column name mapping
-  // @return The returned map
+  /// \brief Getter for the column name mapping
+  /// \return The returned map
  std::unordered_map<std::string, int32_t> column_name_id_map() const { return column_name_id_map_; }

-  // Checks if the column name map has been set up yet for this op
-  // @return - T/F if the operator has the map set up
+  /// \brief Checks if the column name map has been set up yet for this op
+  /// \return - T/F if the operator has the map set up
  bool HasColumnNameMap() const { return (column_name_id_map_.empty()); }

-  // gives a string output for the column map for handy debug printing
-  // @return - the column name map as a string
+  /// \brief gives a string output for the column map for handy debug printing
+  /// \return - the column name map as a string
  std::string ColumnNameMapAsString() const;

-  // Getter function
-  // @return connector size of current op
+  /// \brief Getter function
+  /// \return connector size of current op
  int32_t ConnectorSize() const {
    if (!inlined()) {
      return out_connector_->size();
@ -230,8 +241,13 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
    return ChildOpConnectorSize();
  }

-  // Getter function
-  // @return connector size of current op
+  /// \brief Counting number of buffer sent out by a connector
+  int64_t ConnectorOutBufferCount() const {
+    return out_connector_ == nullptr ? int64_t(-1) : static_cast<int64_t>(out_connector_->out_buffers_count());
+  }
+
+  /// \brief Getter function
+  /// \return connector size of current op
  int32_t ConnectorCapacity() const {
    if (!inlined()) {
      return out_connector_->capacity();
@ -240,51 +256,84 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
    return ChildOpConnectorCapacity();
  }

-  // Getter function
-  // @return connector size of child op
+  /// \brief Getter function
+  /// \return connector size of child op
  int32_t ChildOpConnectorSize(int32_t child_index = 0) const { return child_[child_index]->ConnectorSize(); }

-  // Getter function
-  // @return connector capacity of child op
+  /// \brief Getter function
+  /// \return connector capacity of child op
  int32_t ChildOpConnectorCapacity(int32_t child_index = 0) const { return child_[child_index]->ConnectorCapacity(); }

-  // Children Getter
-  // @return Vector of Children
+  /// \brief Children Getter
+  /// \return Vector of Children
  std::vector<std::shared_ptr<DatasetOp>> Children() const { return child_; }

-  // Base method for NodePass visit.
-  // Subclass needs to override this if it requires special node visit access.
-  // Check "dataset/engine/opt/pass.h" for more details.
-  // @return Statue of the node visit
+  /// \brief Base method for NodePass pre-visit.  A tree walk consists of walking down the tree and also walking back up
+  ///     in a depth-first order.  PreAccept is the node visit on the way down, whereas the regular Accept is the main
+  ///     visit on the way back up the tree during a post-order traversal. Subclass needs to override this if it
+  ///     requires special node visit access. Check "dataset/engine/opt/pass.h" for more details.
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
+  virtual Status PreAccept(NodePass *p, bool *modified);
+
+  /// \brief Base method for NodePass visit. Subclass needs to override this if it requires special node visit access.
+  ///     Check "dataset/engine/opt/pass.h" for more details.
+  /// \param[in] p The node to visit
+  /// \param[out] modified Indicator if the node was modified
+  /// \return Status of the node visit
  virtual Status Accept(NodePass *p, bool *modified);

-  // Op name getter
-  // @return Name of the current Op
+  /// Op name getter
+  /// \return Name of the current Op
  virtual std::string Name() const { return "DatasetOp"; }

-  // Execution Tree getter
-  // @return Pointer to the ExecutionTree the current op belongs to, no ownership
+  /// Execution Tree getter
+  /// \return Pointer to the ExecutionTree the current op belongs to, no ownership
  ExecutionTree *Tree() { return tree_; }

+  /// Getter for the sampler
+  /// \return Shared pointer to the sampler (may return nullptr)
+  std::shared_ptr<Sampler> sampler() { return sampler_; }
+
+  /// Computes a CRC value for the operator
+  static uint32_t GenerateCRC(const std::shared_ptr<DatasetOp> &op);
+
+  /// \brief A helper templated function for casting "this" pointer to shared_ptr<derived>
+  ///     Similar to shared_from_this, except this one will give you the derived class as shared_ptr
+  /// \return A shared_ptr casted to the derived class
+  template <typename Derived>
+  std::shared_ptr<Derived> shared_from_base() {
+    return std::static_pointer_cast<Derived>(shared_from_this());
+  }
+
 protected:
-  // Adds a parent operator to this operator
-  // @notes External callers do not have access to this function.
-  // @param parent - The parent node to add
+  /// Adds a parent operator to this operator
+  /// \notes External callers do not have access to this function.
+  /// \param parent - The parent node to add
  void AddParent(DatasetOp *parent);

-  // Removes a parent operator from this operator
-  // @notes External callers do not have access to this function.
-  // @param parent - The parent node to remove
-  void RemoveParent(DatasetOp *parent);
+  /// Removes a parent operator from this operator
+  /// \notes External callers do not have access to this function.
+  /// \param parent - The parent node to remove
+  void RemoveParent(const DatasetOp *parent);

-  // A helper function for providing an assignment of the column name map.
-  // This grabs the map from child 0 and assigns it into this op.
-  // Can only be used if number of children is 1.
-  // @return - Status
-  Status AssignColMapFromChild();
+  /// Compute the current op's column map using its child's column map.
+  /// Get called during the tree post-prepare phase in PrepareNodePostAction.
+  /// This base implementation just inherits the map from child 0, and can only be used if the number of children is 1.
+  /// Operations changing the column map it inherits from the child must overwrite this function.
+  /// \return - Status
+  virtual Status ComputeColMap();
+
+  /// A helper function with some common code that leaf nodes can use during
+  /// pre/pare phase for checking if they need to assign a sampler to the cache.
+  /// \param random_access_op - indicate if this is a mappable random access leaf or not
+  /// \return - Status
+  Status SaveSamplerForCache(bool random_access_op);

  std::vector<std::shared_ptr<DatasetOp>> child_;                // Child nodes
  std::vector<DatasetOp *> parent_;                              // Parent nodes. No ownership
+  std::shared_ptr<Sampler> sampler_;                             // Some leaf ops might have a sampler
  int32_t oc_queue_size_;                                        // Capacity for each out_connector_
  int32_t operator_id_;                                          // Generated id for the node
  ExecutionTree *tree_;                                          // Back pointer to our tree.
@ -292,18 +341,17 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
  uint32_t op_ctrl_flags_;                                       // Flags for the operator
  std::unique_ptr<DbConnector> out_connector_;                   // Output Connector
  std::unordered_map<std::string, int32_t> column_name_id_map_;  // Mapping between col index and col name
-  bool first_fetch_;                                             // For use when setting column map
  std::mutex column_name_map_mutex_;                             // For protecting shared access to the column map

 private:
-  // Sets the operator id.
-  // @notes No public interface.  Only the class itself, or it's friend the execution tree can set
-  // this
-  // @param op_id - the Id value to set into the operator
+  /// Sets the operator id.
+  /// \notes No public interface.  Only the class itself, or it's friend the execution tree can set
+  /// this
+  /// \param op_id - the Id value to set into the operator
  void set_id(int32_t op_id) { operator_id_ = op_id; }

-  // Sets the tree into the op so that the operator has a back pointer to the tree.
-  // @param tree - the tree to assign to the op.
+  /// Sets the tree into the op so that the operator has a back pointer to the tree.
+  /// \param tree - the tree to assign to the op.
  void set_tree(ExecutionTree *tree) { tree_ = tree; }
 };
 }  // namespace dataset
--- a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
@ -313,7 +313,7 @@ void DeviceQueueOp::Print(std::ostream &out, bool show_all) const {
 // Visitor accept method for NodePass
 Status DeviceQueueOp::Accept(NodePass *p, bool *modified) {
  // Downcast shared pointer then call visitor
-  return p->RunOnNode(std::static_pointer_cast<DeviceQueueOp>(shared_from_this()), modified);
+  return p->RunOnNode(shared_from_base<DeviceQueueOp>(), modified);
 }

 }  // namespace dataset
--- a/mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc
@ -126,9 +126,6 @@ Status FilterOp::WorkerEntry(int32_t worker_id) {
      continue;
    }

-    // Now that the first fetch is in, use the helper function to assign the column name map to this op.
-    RETURN_IF_NOT_OK(DatasetOp::AssignColMapFromChild());
-
    RETURN_IF_NOT_OK(CheckColumns(in_buffer.get(), &in_columns_));

    // if the databuffer was all filtered, it is marked as kFilterEmpty.
@ -264,7 +261,7 @@ Status FilterOp::InvokePredicateFunc(const TensorRow &input, bool *out_predicate
 // Visitor accept method for NodePass
 Status FilterOp::Accept(NodePass *p, bool *modified) {
  // Downcast shared pointer then call visitor
-  return p->RunOnNode(std::static_pointer_cast<FilterOp>(shared_from_this()), modified);
+  return p->RunOnNode(shared_from_base<FilterOp>(), modified);
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc
@ -54,20 +54,19 @@ Status MapOp::Builder::sanityCheck() const {
 Status MapOp::Builder::Build(std::shared_ptr<MapOp> *ptr) {
  RETURN_IF_NOT_OK(sanityCheck());
  *ptr = std::make_shared<MapOp>(std::move(build_in_col_names_), std::move(build_out_col_names_),
-                                 std::move(build_tensor_funcs_), std::move(build_col_order_), build_num_workers_,
-                                 build_op_connector_size_, build_perf_mode_);
+                                 std::move(build_tensor_funcs_), build_num_workers_, build_op_connector_size_,
+                                 build_perf_mode_);
  return Status::OK();
 }

 // Constructor of MapOp
 MapOp::MapOp(const std::vector<std::string> &in_col_names, const std::vector<std::string> &out_col_names,
-             std::vector<std::shared_ptr<TensorOp>> tensor_funcs, const std::vector<std::string> &columns_order,
-             int32_t num_workers, int32_t op_connector_size, bool perf_mode)
+             std::vector<std::shared_ptr<TensorOp>> tensor_funcs, int32_t num_workers, int32_t op_connector_size,
+             bool perf_mode)
    : ParallelOp(num_workers, op_connector_size),
      tfuncs_(std::move(tensor_funcs)),
      in_columns_(in_col_names),
      out_columns_(out_col_names),
-      columns_order_(columns_order),
      perf_mode_(perf_mode) {
  // If caller didn't specify the out_col_names, assume they are same as the in_columns.
  if (out_columns_.empty() || out_columns_[0].empty()) {
@ -101,7 +100,7 @@ void MapOp::Print(std::ostream &out, bool show_all) const {
    }
    out << "\n  TensorOps:";
    for (size_t i = 0; i < tfuncs_.size(); i++) {
-      out << " " << tfuncs_[i];
+      out << " " << *(tfuncs_[i].get());
    }
    out << "\n\n";
  }
@ -156,14 +155,15 @@ Status MapOp::WorkerEntry(int32_t worker_id) {
  // initializations that happen after the first fetch.
  RETURN_IF_NOT_OK(FetchNextBuffer(&in_buffer, worker_id));

-  // Initialize details related to column selections and column map by calling WorkerEntryInit.
-  // WorkerEntryInit contains thread-safe lock to ensure that this init work is only performed once
-  // by the first worker to enter the codepath. All other threads will share the const info that
-  // gets set up here going forward.
+  // Sanity check the databuffer.
  // Special case: if there's more threads than buffers, some threads simply get the final control
-  // messages (eoe/eof), and so they will not perform the init work.
+  // messages (eoe/eof), and so they will not perform the check.
  if (!in_buffer->eoe() && !in_buffer->eof()) {
-    RETURN_IF_NOT_OK(WorkerEntryInit(in_buffer.get()));
+    int32_t num_rows = in_buffer->NumRows();
+    int32_t num_cols = in_buffer->NumCols();
+    if (num_rows == 0 || num_cols == 0) {
+      RETURN_STATUS_UNEXPECTED("MapOp is getting an empty DataBuffer.");
+    }
  }

  // Now that init work is done, drop into the main fetching loop.
@ -258,63 +258,18 @@ Status MapOp::WorkerCompute(DataBuffer *in_buffer, TensorQTable *new_tensor_tabl
  return Status::OK();
 }

-// initialize some internal data structure used by WorkerEntry()
-Status MapOp::WorkerEntryInit(const DataBuffer *in_buf) {
-  int32_t num_rows = in_buf->NumRows();
-  int32_t num_cols = in_buf->NumCols();
-  if (num_rows == 0 || num_cols == 0) {
-    RETURN_STATUS_UNEXPECTED("MapOp is getting an empty DataBuffer.");
+Status MapOp::ComputeColMap() {
+  // If the map has not been set up yet in the base class, then set it up
+  if (column_name_id_map_.empty()) {
+    std::unordered_map<std::string, int32_t> current_name_id_map = child_[0]->column_name_id_map();
+    // Initialize private variables
+    RETURN_IF_NOT_OK(InitPrivateVariable(&current_name_id_map));
+    // Create the final column name to index mapping in the base class field
+    CreateFinalColMap(&current_name_id_map);
+    MS_LOG(DEBUG) << "Column name map for map op set: " << this->ColumnNameMapAsString();
+  } else {
+    MS_LOG(WARNING) << "Column name map is already set!";
  }
-
-  // We can't use AssignColMapFromChild() here since we need to modify the column map. We need to be threadsafe
-  // though for saving the final map in the op, so use the lock here.
-  if (first_fetch_) {
-    std::unique_lock<std::mutex> lock(column_name_map_mutex_);
-    // If the map has not been set up yet in the base class, then we are the first one in to set it up
-    // (and we are under protection of the mutex lock)
-    if (column_name_id_map_.empty()) {
-      std::unordered_map<std::string, int32_t> current_name_id_map = child_[0]->column_name_id_map();
-
-      // If input_columns is empty(), The col at index-0 will be picked.
-      if (in_columns_.empty()) {
-        for (const auto &pair : current_name_id_map) {
-          if (pair.second == 0) {
-            MS_LOG(INFO) << "Input columns empty for map op, will apply to the first column in the current table.";
-            in_columns_.push_back(pair.first);
-            break;
-          }
-        }
-
-        // If caller didn't specify the out_col_names, assume they are same as the input_columns.
-        // This was done in the constructor, but if input columns was empty to start we have to redo it here.
-        if (out_columns_.empty() || out_columns_[0].empty()) {
-          out_columns_ = in_columns_;
-        }
-      }
-
-      // Before we continue, issue a sanity check to make sure the input columns from user and the incoming
-      // columns from child are correct
-      RETURN_IF_NOT_OK(this->ValidateInColumns(current_name_id_map));
-
-      // initialize keep_input_columns, true means to keep the column.
-      keep_input_columns_.resize(num_cols, true);
-      for (const auto &col_name : in_columns_) {
-        int32_t missed = current_name_id_map[col_name];
-        keep_input_columns_[missed] = false;
-      }
-
-      // initialize to_process_indices.
-      for (const auto &col_name : in_columns_) {
-        to_process_indices_.push_back(current_name_id_map[col_name]);
-      }
-
-      // Create the final column name to index mapping in the base class field
-      CreateFinalColMap(&current_name_id_map);
-      first_fetch_ = false;
-    }
-  }  // mutex lock will release here
-
-  MS_LOG(DEBUG) << "Column name map for map op set: " << this->ColumnNameMapAsString();
  return Status::OK();
 }

@ -330,6 +285,42 @@ Status MapOp::ValidateInColumns(const std::unordered_map<std::string, int32_t> &
  return Status::OK();
 }

+Status MapOp::InitPrivateVariable(std::unordered_map<std::string, int32_t> *col_name_id_map) {
+  // If input_columns is empty(), The col at index-0 will be picked.
+  if (in_columns_.empty()) {
+    for (const auto &pair : *col_name_id_map) {
+      if (pair.second == 0) {
+        MS_LOG(INFO) << "Input columns empty for map op, will apply to the first column in the current table.";
+        in_columns_.push_back(pair.first);
+        break;
+      }
+    }
+
+    // If caller didn't specify the out_col_names, assume they are same as the input_columns.
+    // This was done in the constructor, but if input columns was empty to start we have to redo it here.
+    if (out_columns_.empty() || out_columns_[0].empty()) {
+      out_columns_ = in_columns_;
+    }
+  }
+
+  // Before we continue, issue a sanity check to make sure the input columns from user and the incoming
+  // columns from child are correct
+  RETURN_IF_NOT_OK(this->ValidateInColumns(*col_name_id_map));
+
+  // initialize keep_input_columns, true means to keep the column.
+  keep_input_columns_.resize(col_name_id_map->size(), true);
+  for (const auto &col_name : in_columns_) {
+    int32_t missed = (*col_name_id_map)[col_name];
+    keep_input_columns_[missed] = false;
+  }
+
+  // initialize to_process_indices.
+  for (const auto &col_name : in_columns_) {
+    to_process_indices_.push_back((*col_name_id_map)[col_name]);
+  }
+  return Status::OK();
+}
+
 // Create the final column name to index mapping and get indices of the columns this mapop does not use.
 void MapOp::CreateFinalColMap(std::unordered_map<std::string, int32_t> *col_name_id_map) {
  std::unordered_map<std::string, int32_t> final_col_name_id_map;
@ -376,7 +367,7 @@ void MapOp::CreateFinalColMap(std::unordered_map<std::string, int32_t> *col_name
 // Visitor accept method for NodePass
 Status MapOp::Accept(NodePass *p, bool *modified) {
  // Downcast shared pointer then call visitor
-  return p->RunOnNode(std::static_pointer_cast<MapOp>(shared_from_this()), modified);
+  return p->RunOnNode(shared_from_base<MapOp>(), modified);
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/map_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/map_op.h
@ -93,13 +93,6 @@ class MapOp : public ParallelOp {
      return *this;
    }

-    // Setter method.
-    // @return Builder setter method returns reference to the builder.
-    Builder &SetColOrder(const std::vector<std::string> &col_order_) {
-      build_col_order_ = col_order_;
-      return *this;
-    }
-
    // Setter method.
    // @return Builder setter method returns reference to the builder.
    Builder &SetNumWorkers(int32_t num_workers) {
@ -130,7 +123,6 @@ class MapOp : public ParallelOp {
    std::vector<std::string> build_in_col_names_;
    std::vector<std::string> build_out_col_names_;
    std::vector<std::shared_ptr<TensorOp>> build_tensor_funcs_;
-    std::vector<std::string> build_col_order_;
    int32_t build_num_workers_;
    int32_t build_op_connector_size_;
    bool build_perf_mode_;  // Default true.
@ -145,12 +137,11 @@ class MapOp : public ParallelOp {
  // @param in_col_names A list of input column names (should match the input/output \p tensorFuncs).
  // @param out_col_names A list of output column names (should match the input/output \p tensorFuncs).
  // @param tensor_funcs A list of TensorOp pointers for MapOp to apply to each data.
-  // @param columns_order names A full list of column names (should match the whole dataset view post \p tensorFuncs).
  // @param num_workers The number of worker threads.
  // @param op_connector_size The size of each queue in the connector.
  MapOp(const std::vector<std::string> &in_col_names, const std::vector<std::string> &out_col_names,
-        std::vector<std::shared_ptr<TensorOp>> tensor_funcs, const std::vector<std::string> &columns_order,
-        int32_t num_workers, int32_t op_connector_size, bool perf_mode);
+        std::vector<std::shared_ptr<TensorOp>> tensor_funcs, int32_t num_workers, int32_t op_connector_size,
+        bool perf_mode);

  // Destructor
  ~MapOp() = default;
@ -190,10 +181,6 @@ class MapOp : public ParallelOp {
  // @return Name of the current Op
  std::string Name() const override { return "MapOp"; }

-  // Columns order getter
-  // @return The post map columns order
-  std::vector<std::string> const &ColumnsOrder() const { return columns_order_; }
-
 private:
  // Local queues where worker threads can pop from.
  // Popping directly from the Connector can block if the previous designated threads haven't pop.
@ -215,9 +202,6 @@ class MapOp : public ParallelOp {
  // Indices of the columns to process.
  std::vector<size_t> to_process_indices_;

-  // Variable to store the column_order of all columns post tensorOps
-  std::vector<std::string> columns_order_;
-
  // Performance mode is when the main thread creates local queues, pulls databuffers from the previous
  // op's Connector and distributes them to the local queues. Workers pull from the local queues.
  // If this flag is false, each worker pulls directly from the Connector. This use less resources
@ -258,15 +242,18 @@ class MapOp : public ParallelOp {
  // @param col_name_id_map The column name to index mapping obtained from child operator
  void CreateFinalColMap(std::unordered_map<std::string, int32_t> *col_name_id_map);

-  // Private function that initialize some internal data structure used by WorkerEntry()
-  // @param in_buf A raw pointer to the DataBuffer. A raw pointer is fine because this function does not manage memory
-  //     and is not shared with other threads.
-  Status WorkerEntryInit(const DataBuffer *in_buf);
-
  // Validating if each of the input_columns exists in the DataBuffer.
  // @param - the column map to check
  // @return - status return code
  Status ValidateInColumns(const std::unordered_map<std::string, int32_t> &col_name_id_map);
+
+  // Private function for computing the assignment of the column name map.
+  // @return - Status
+  Status ComputeColMap() override;
+
+  // Private function for initializing private variables such as in_columns_, out_columns_.
+  // @return - Status
+  Status InitPrivateVariable(std::unordered_map<std::string, int32_t> *col_name_id_map);
 };
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc
@ -26,8 +26,8 @@
 namespace mindspore {
 namespace dataset {
 // Constructor
-ParallelOp::ParallelOp(int32_t num_workers, int32_t op_connector_size)
-    : DatasetOp(op_connector_size),
+ParallelOp::ParallelOp(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<Sampler> sampler)
+    : DatasetOp(op_connector_size, sampler),
      num_workers_(num_workers),
      num_producers_(num_workers),
      worker_connector_size_(1),
--- a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.h
@ -38,7 +38,8 @@ class ParallelOp : public DatasetOp {
  // Constructor
  // @param num_workers
  // @param op_connector_size - size of the output connector for this operator
-  ParallelOp(int32_t num_workers, int32_t op_connector_size);
+  // @param sampler - The sampler for the op
+  ParallelOp(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<Sampler> sampler = nullptr);

  // Destructor
  ~ParallelOp() = default;
--- a/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc
@ -20,7 +20,8 @@
 namespace mindspore {
 namespace dataset {
 // Constructor
-PipelineOp::PipelineOp(int32_t op_connector_size) : DatasetOp(op_connector_size) {}
+PipelineOp::PipelineOp(int32_t op_connector_size, std::shared_ptr<Sampler> sampler)
+    : DatasetOp(op_connector_size, sampler) {}

 // A print method typically used for debugging
 void PipelineOp::Print(std::ostream &out, bool show_all) const {
--- a/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.h
@ -32,7 +32,8 @@ class PipelineOp : public DatasetOp {
  // Constructor
  // @param op_connector_size - size of the output connector
  // @return Builder setter method returns reference to the builder.
-  explicit PipelineOp(int32_t op_connector_size);
+  // @param sampler - The sampler for the op
+  explicit PipelineOp(int32_t op_connector_size, std::shared_ptr<Sampler> sampler = nullptr);

  // Destructor
  ~PipelineOp() = default;
--- a/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc
@ -74,24 +74,6 @@ void ProjectOp::Print(std::ostream &out, bool show_all) const {
 Status ProjectOp::GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer, int32_t worker_id, bool retry_if_eoe) {
  RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(p_buffer, worker_id, retry_if_eoe));
  if (!((*p_buffer)->eoe()) && !((*p_buffer)->eof())) {
-    // Only for the first buffer fetched, get the column map of the incoming data and save it
-    // into our own column name map after making the appropriate mods
-    // We cannot use the super class AssignColMapFromChild here because we're making a modification of the
-    // map from the child map.
-    if (first_fetch_) {
-      std::unordered_map<std::string, int32_t> child_column_name_mapping = child_[0]->column_name_id_map();
-      for (size_t i = 0; i < columns_to_project_.size(); i++) {
-        std::string &current_column = columns_to_project_[i];
-        if (child_column_name_mapping.find(current_column) == child_column_name_mapping.end()) {
-          std::string err_msg = "ProjectOp: column " + current_column + " does not exist in child operator.";
-          RETURN_STATUS_UNEXPECTED(err_msg);
-        }
-        // Setup the new column name mapping for ourself (base class field)
-        column_name_id_map_[current_column] = i;
-        projected_column_indices_.push_back(child_column_name_mapping[current_column]);
-      }
-      first_fetch_ = false;  // we only need to do this path once
-    }
    RETURN_IF_NOT_OK(Project(p_buffer));
  }
  return Status::OK();
@ -149,7 +131,29 @@ Status ProjectOp::EofReceived(int32_t worker_id) { return Status::OK(); }
 // Visitor accept method for NodePass
 Status ProjectOp::Accept(NodePass *p, bool *modified) {
  // Downcast shared pointer then call visitor
-  return p->RunOnNode(std::static_pointer_cast<ProjectOp>(shared_from_this()), modified);
+  return p->RunOnNode(shared_from_base<ProjectOp>(), modified);
+}
+
+// Compute the column map and save it into our own column name map
+// We cannot use the super class ComputeColMap here because we're making a modification of the
+// map from the child map.
+Status ProjectOp::ComputeColMap() {
+  if (column_name_id_map_.empty()) {
+    std::unordered_map<std::string, int32_t> child_column_name_mapping = child_[0]->column_name_id_map();
+    for (size_t i = 0; i < columns_to_project_.size(); i++) {
+      std::string &current_column = columns_to_project_[i];
+      if (child_column_name_mapping.find(current_column) == child_column_name_mapping.end()) {
+        std::string err_msg = "ProjectOp: column " + current_column + " does not exist in child operator.";
+        RETURN_STATUS_UNEXPECTED(err_msg);
+      }
+      // Setup the new column name mapping for ourself (base class field)
+      column_name_id_map_[current_column] = i;
+      projected_column_indices_.push_back(child_column_name_mapping[current_column]);
+    }
+  } else {
+    MS_LOG(WARNING) << "Column name map is already set!";
+  }
+  return Status::OK();
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/project_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/project_op.h
@ -116,6 +116,10 @@ class ProjectOp : public PipelineOp {
  std::vector<int32_t> projected_column_indices_;

  Status Project(std::unique_ptr<DataBuffer> *data_buffer);
+
+  // Computing the assignment of the column name map.
+  // @return - Status
+  Status ComputeColMap() override;
 };
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc
@ -69,12 +69,6 @@ Status RenameOp::operator()() {
    RETURN_STATUS_UNEXPECTED(err_msg);
  }

-  // First, populate the column map from the input child.
-  // This will not be the final map for output from this op.
-  RETURN_IF_NOT_OK(DatasetOp::AssignColMapFromChild());
-  // core rename functionality only needs to happen once, to identify the new column names/indexes
-  RETURN_IF_NOT_OK(RenameColumns());
-
  while (curr_buffer->eof() == false) {
    while (curr_buffer->eoe() == false) {
      // push the renamed input buffer
@ -95,45 +89,52 @@ Status RenameOp::operator()() {
  return Status::OK();
 }

-// renames the columns
-Status RenameOp::RenameColumns() {
-  // iterate over my index in input vector, find the corresponding position
-  std::unordered_map<std::string, int32_t> new_col_name_id_map = {};
-  // parameter for input check
-  size_t found = 0;
+// Rename core functionality to compute the new column name id map.
+// We need to overwrite the super class ComputeColMap here because we're making a modification of the
+// map from the child map.
+Status RenameOp::ComputeColMap() {
+  if (column_name_id_map_.empty()) {
+    column_name_id_map_ = child_[0]->column_name_id_map();
+    // iterate over my index in input vector, find the corresponding position
+    std::unordered_map<std::string, int32_t> new_col_name_id_map = {};
+    // parameter for input check
+    size_t found = 0;

-  // iterate over all the pairs and if there is a name match with rename, rename the column and add it to new map
-  // by doing it this way we recreate a new ColNameIdMap and allow for switching
-  for (const auto &pair : column_name_id_map_) {
-    std::string name = pair.first;
-    int32_t id = pair.second;
-    // find name
-    std::vector<std::string>::iterator it;
-    it = std::find(in_columns_.begin(), in_columns_.end(), name);
-    // for c input checks here we have to count the number of times we find the stuff in in_columns_
-    // because we iterate over the mInputList n times
-    if (it != in_columns_.end()) {
-      // found
-      found += 1;
-      int index = std::distance(in_columns_.begin(), it);
-      MS_LOG(DEBUG) << "Rename operator index found " << index << " value " << id << ".";
+    // iterate over all the pairs and if there is a name match with rename, rename the column and add it to new map
+    // by doing it this way we recreate a new ColNameIdMap and allow for switching
+    for (const auto &pair : column_name_id_map_) {
+      std::string name = pair.first;
+      int32_t id = pair.second;
+      // find name
+      std::vector<std::string>::iterator it;
+      it = std::find(in_columns_.begin(), in_columns_.end(), name);
+      // for c input checks here we have to count the number of times we find the stuff in in_columns_
+      // because we iterate over the mInputList n times
+      if (it != in_columns_.end()) {
+        // found
+        found += 1;
+        int index = std::distance(in_columns_.begin(), it);
+        MS_LOG(DEBUG) << "Rename operator index found " << index << " value " << id << ".";

-      new_col_name_id_map[out_columns_[index]] = id;
-    } else {
-      // not found
-      MS_LOG(DEBUG) << "Rename operator index not found: " << id << " is the column id.";
-      new_col_name_id_map[name] = id;
+        new_col_name_id_map[out_columns_[index]] = id;
+      } else {
+        // not found
+        MS_LOG(DEBUG) << "Rename operator index not found: " << id << " is the column id.";
+        new_col_name_id_map[name] = id;
+      }
+    }
+    // only checks number of renamed columns have been found, this input check doesn't check everything
+    if (found != in_columns_.size()) {
+      MS_LOG(DEBUG) << "Rename operator column names found: " << found << " out of " << in_columns_.size() << ".";
+      std::string err_msg = "Renamed column doesn't exist in dataset";
+      RETURN_STATUS_UNEXPECTED(err_msg);
    }
-  }
-  // only checks number of renamed columns have been found, this input check doesn't check everything
-  if (found != in_columns_.size()) {
-    MS_LOG(DEBUG) << "Rename operator column names found: " << found << " out of " << in_columns_.size() << ".";
-    std::string err_msg = "Renamed column doesn't exist in dataset";
-    RETURN_STATUS_UNEXPECTED(err_msg);
-  }

-  // Now, overwrite our column map with the new renamed columns/id's
-  column_name_id_map_ = new_col_name_id_map;
+    // Now, overwrite our column map with the new renamed columns/id's
+    column_name_id_map_ = new_col_name_id_map;
+  } else {
+    MS_LOG(WARNING) << "Column name map is already set!";
+  }
  return Status::OK();
 }

@ -175,7 +176,7 @@ Status RenameOp::EoeReceived(int32_t) {
 // Visitor accept method for NodePass
 Status RenameOp::Accept(NodePass *p, bool *modified) {
  // Downcast shared pointer then call visitor
-  return p->RunOnNode(std::static_pointer_cast<RenameOp>(shared_from_this()), modified);
+  return p->RunOnNode(shared_from_base<RenameOp>(), modified);
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/rename_op.h
@ -122,7 +122,9 @@ class RenameOp : public PipelineOp {

 protected:
  // Rename core functionality
-  Status RenameColumns();
+  // Computing the assignment of the new column name map.
+  // @return - Status
+  Status ComputeColMap() override;

  // Variable to store the input column names
  std::vector<std::string> in_columns_;
--- a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc
@ -82,14 +82,14 @@ void RepeatOp::Print(std::ostream &out, bool show_all) const {
 Status RepeatOp::PrepareNodePostAction() {
  // Run any common code from super class first before adding our own specific logic
  RETURN_IF_NOT_OK(PipelineOp::PrepareNodePostAction());
-  std::shared_ptr<DatasetOp> leaf_op = tree_->PopFromRepeatStack();
+  std::shared_ptr<DatasetOp> leaf_op = tree_->PopFromEOEOpStack();
  while (leaf_op != nullptr) {
    // Track the leaf operators that are under this repeat op.
    eoe_ops_.push_back(leaf_op);
-    leaf_op = tree_->PopFromRepeatStack();
+    leaf_op = tree_->PopFromEOEOpStack();
  }
  // Push ourselves to the stack in case one of our ascendants is repeat too.
-  tree_->AddToRepeatStack(shared_from_this());
+  tree_->AddToEOEOpStack(shared_from_this());
  return Status::OK();
 }

@ -123,8 +123,6 @@ Status RepeatOp::GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer, int32_t wo
  if (buf->eof()) {
    RETURN_IF_NOT_OK(EofReceived(worker_id));
  }
-  // Update the column name map if needed
-  RETURN_IF_NOT_OK(DatasetOp::AssignColMapFromChild());
  *p_buffer = std::move(buf);
  return Status::OK();
 }
@ -192,7 +190,7 @@ int32_t RepeatOp::num_producers() const {
 // Visitor accept method for NodePass
 Status RepeatOp::Accept(NodePass *p, bool *modified) {
  // Downcast shared pointer then call visitor
-  return p->RunOnNode(std::static_pointer_cast<RepeatOp>(shared_from_this()), modified);
+  return p->RunOnNode(shared_from_base<RepeatOp>(), modified);
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc
@ -266,9 +266,6 @@ Status ShuffleOp::InitShuffleBuffer() {
    RETURN_STATUS_UNEXPECTED("Unable to fetch a single row for shuffle buffer.");
  }

-  // Now that a first fetch is done, assign the column map for this operator
-  RETURN_IF_NOT_OK(DatasetOp::AssignColMapFromChild());
-
  // Now fill the rest of the shuffle buffer until we are unable to get the next row or we reached
  // the desired shuffle buffer size.
  while (!new_row.empty() && shuffle_buffer_->size() < static_cast<size_t>(shuffle_size_ - 1)) {
@ -301,7 +298,7 @@ Status ShuffleOp::EoeReceived(int32_t worker_id) {
 // Visitor accept method for NodePass
 Status ShuffleOp::Accept(NodePass *p, bool *modified) {
  // Downcast shared pointer then call visitor
-  return p->RunOnNode(std::static_pointer_cast<ShuffleOp>(shared_from_this()), modified);
+  return p->RunOnNode(shared_from_base<ShuffleOp>(), modified);
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc
@ -86,9 +86,6 @@ Status SkipOp::operator()() {
  std::unique_ptr<DataBuffer> curr_buffer;
  RETURN_IF_NOT_OK(GetNextInput(&curr_buffer));

-  // After the first buffer fetch above we can do the one-time assign of the column name map
-  RETURN_IF_NOT_OK(DatasetOp::AssignColMapFromChild());
-
  while (curr_buffer->eof() == false) {
    // Reset count
    skip_count_ = 0;
@ -133,7 +130,7 @@ Status SkipOp::EofReceived(int32_t worker_id) {
 // Visitor accept method for NodePass
 Status SkipOp::Accept(NodePass *p, bool *modified) {
  // Downcast shared pointer then call visitor
-  return p->RunOnNode(std::static_pointer_cast<SkipOp>(shared_from_this()), modified);
+  return p->RunOnNode(shared_from_base<SkipOp>(), modified);
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
@ -70,20 +70,14 @@ Status CelebAOp::Builder::SanityCheck() {
 CelebAOp::CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &dir, int32_t queue_size,
                   bool decode, const std::string &dataset_type, const std::set<std::string> &exts,
                   std::unique_ptr<DataSchema> schema, std::shared_ptr<Sampler> sampler)
-    : ParallelOp(num_workers, queue_size),
+    : ParallelOp(num_workers, queue_size, std::move(sampler)),
      rows_per_buffer_(rows_per_buffer),
      folder_path_(dir),
      decode_(decode),
      extensions_(exts),
      data_schema_(std::move(schema)),
-      sampler_(std::move(sampler)),
      num_rows_in_attr_file_(0),
      dataset_type_(dataset_type) {
-  // Set the column name map (base class field)
-  for (int32_t index = 0; index < data_schema_->NumColumns(); index++) {
-    column_name_id_map_[data_schema_->column(index).name()] = index;
-  }
-
  attr_info_queue_ = std::make_unique<Queue<std::vector<std::string>>>(queue_size);
  io_block_queues_.Init(num_workers_, queue_size);
 }
@ -413,5 +407,17 @@ Status CelebAOp::Reset() {
  wp_.Set();  // wake up master thread after reset is done
  return Status::OK();
 }
+
+Status CelebAOp::ComputeColMap() {
+  // Set the column name map (base class field)
+  if (column_name_id_map_.empty()) {
+    for (int32_t index = 0; index < data_schema_->NumColumns(); index++) {
+      column_name_id_map_[data_schema_->column(index).name()] = index;
+    }
+  } else {
+    MS_LOG(WARNING) << "Column name map is already set!";
+  }
+  return Status::OK();
+}
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.h
@ -212,12 +212,15 @@ class CelebAOp : public ParallelOp, RandomAccessOp {
  // @return Status - The error code return
  Status Reset() override;

+  // Private function for computing the assignment of the column name map.
+  // @return - Status
+  Status ComputeColMap() override;
+
  int32_t rows_per_buffer_;
  std::string folder_path_;  // directory of celeba folder
  bool decode_;
  std::set<std::string> extensions_;  // extensions allowed
  std::unique_ptr<DataSchema> data_schema_;
-  std::shared_ptr<Sampler> sampler_;
  std::unique_ptr<Queue<std::vector<std::string>>> attr_info_queue_;
  int64_t num_rows_in_attr_file_;  // rows number specified in attr file
  QueueList<std::unique_ptr<IOBlock>> io_block_queues_;
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
@ -79,18 +79,13 @@ Status CifarOp::Builder::SanityCheck() {

 CifarOp::CifarOp(CifarType type, int32_t num_works, int32_t rows_per_buf, const std::string &file_dir,
                 int32_t queue_size, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<Sampler> sampler)
-    : ParallelOp(num_works, queue_size),
+    : ParallelOp(num_works, queue_size, std::move(sampler)),
      cifar_type_(type),
      rows_per_buffer_(rows_per_buf),
      folder_path_(file_dir),
      data_schema_(std::move(data_schema)),
-      sampler_(std::move(sampler)),
      row_cnt_(0),
      buf_cnt_(0) {
-  // set the column name map (base class field)
-  for (uint32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-    column_name_id_map_[data_schema_->column(i).name()] = i;
-  }
  constexpr uint64_t kUtilQueueSize = 512;
  cifar_raw_data_block_ = std::make_unique<Queue<std::vector<unsigned char>>>(kUtilQueueSize);
  io_block_queues_.Init(num_workers_, queue_size);
@ -454,5 +449,17 @@ Status CifarOp::CountTotalRows(const std::string &dir, bool isCIFAR10, int64_t *
    return Status::OK();
  }
 }
+
+Status CifarOp::ComputeColMap() {
+  // set the column name map (base class field)
+  if (column_name_id_map_.empty()) {
+    for (uint32_t i = 0; i < data_schema_->NumColumns(); ++i) {
+      column_name_id_map_[data_schema_->column(i).name()] = i;
+    }
+  } else {
+    MS_LOG(WARNING) << "Column name map is already set!";
+  }
+  return Status::OK();
+}
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.h
@ -208,11 +208,14 @@ class CifarOp : public ParallelOp, public RandomAccessOp {
  // @return Status - The error code return
  Status GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const override;

+  // Private function for computing the assignment of the column name map.
+  // @return - Status
+  Status ComputeColMap() override;
+
  CifarType cifar_type_;
  int32_t rows_per_buffer_;
  std::string folder_path_;
  std::unique_ptr<DataSchema> data_schema_;
-  std::shared_ptr<Sampler> sampler_;
  int64_t row_cnt_;
  int64_t buf_cnt_;

--- a/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.cc
@ -31,11 +31,7 @@
 namespace mindspore {
 namespace dataset {
 ClueOp::Builder::Builder()
-    : builder_device_id_(0),
-      builder_num_devices_(1),
-      builder_num_samples_(0),
-      builder_shuffle_files_(false),
-      builder_shuffle_global_(false) {
+    : builder_device_id_(0), builder_num_devices_(1), builder_num_samples_(0), builder_shuffle_files_(false) {
  std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
  builder_num_workers_ = config_manager->num_parallel_workers();
  builder_op_connector_size_ = config_manager->op_connector_size();
@ -66,8 +62,8 @@ Status ClueOp::Builder::Build(std::shared_ptr<ClueOp> *op) {

  std::shared_ptr<ClueOp> clue_op = std::make_shared<ClueOp>(
    builder_num_workers_, builder_rows_per_buffer_, builder_num_samples_, builder_worker_connector_size_, ck_map,
-    builder_clue_files_list_, builder_op_connector_size_, builder_shuffle_files_, builder_shuffle_global_,
-    builder_num_devices_, builder_device_id_);
+    builder_clue_files_list_, builder_op_connector_size_, builder_shuffle_files_, builder_num_devices_,
+    builder_device_id_);
  RETURN_IF_NOT_OK(clue_op->Init());
  *op = std::move(clue_op);

@ -87,7 +83,7 @@ std::vector<std::string> ClueOp::Builder::split(const std::string &s, char delim

 ClueOp::ClueOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size,
               ColKeyMap cols_to_keyword, std::vector<std::string> clue_files_list, int32_t op_connector_size,
-               bool shuffle_files, bool shuffle_global, int32_t num_device, int32_t device_id)
+               bool shuffle_files, int32_t num_device, int32_t device_id)
    : ParallelOp(num_workers, op_connector_size),
      rows_per_buffer_(rows_per_buffer),
      num_rows_per_shard_(0),
@ -98,7 +94,6 @@ ClueOp::ClueOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples
      load_jagged_connector_(true),
      cols_to_keyword_(cols_to_keyword),
      shuffle_files_(shuffle_files),
-      shuffle_global_(shuffle_global),
      finished_reading_dataset_(false),
      num_devices_(num_device),
      device_id_(device_id),
@ -112,13 +107,6 @@ Status ClueOp::Init() {
  int32_t safe_queue_size = static_cast<int32_t>(std::ceil(clue_files_list_.size() / num_workers_) + 1);
  io_block_queues_.Init(num_workers_, safe_queue_size);

-  // Set the column name mapping (base class field)
-  int count = 0;
-  for (auto &p : cols_to_keyword_) {
-    column_name_id_map_[p.first] = count;
-    count++;
-  }
-
  RETURN_IF_NOT_OK(ParallelOp::CreateWorkerConnector(worker_connector_size_));
  jagged_buffer_connector_ = std::make_unique<JaggedConnector>(num_workers_, 1, worker_connector_size_);

@ -549,5 +537,19 @@ Status ClueOp::CountAllFileRows(const std::vector<std::string> &files, int64_t *
  }
  return Status::OK();
 }
+
+Status ClueOp::ComputeColMap() {
+  // Set the column name mapping (base class field)
+  if (column_name_id_map_.empty()) {
+    int count = 0;
+    for (auto &p : cols_to_keyword_) {
+      column_name_id_map_[p.first] = count;
+      count++;
+    }
+  } else {
+    MS_LOG(WARNING) << "Column name map is already set!";
+  }
+  return Status::OK();
+}
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.h
@ -104,13 +104,6 @@ class ClueOp : public ParallelOp {
      return *this;
    }

-    // Setter method.
-    // @return Builder - setter method returns reference to the builder.
-    Builder &SetShuffleGlobal(bool shuffle_global) {
-      builder_shuffle_global_ = shuffle_global;
-      return *this;
-    }
-
    // Setter method.
    // @return Builder - setter method returns reference to the builder.
    Builder &SetNumSamples(int64_t num_samples) {
@ -139,15 +132,13 @@ class ClueOp : public ParallelOp {
    int32_t builder_worker_connector_size_;
    std::vector<std::string> builder_clue_files_list_;
    bool builder_shuffle_files_;
-    bool builder_shuffle_global_;
    std::map<std::string, std::string> builder_cols_to_keyword_;
  };

  // Constructor of ClueOp
-  // @param shuffle_global - whether or not to shuffle the entire dataset.
  ClueOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size,
         ColKeyMap cols_to_keyword, std::vector<std::string> clue_files_list, int32_t op_connector_size,
-         bool shuffle_files, bool shuffle_global, int32_t num_devices, int32_t device_id);
+         bool shuffle_files, int32_t num_devices, int32_t device_id);

  // Default destructor
  ~ClueOp() = default;
@ -182,10 +173,6 @@ class ClueOp : public ParallelOp {
  // @return Vector of the input file names
  std::vector<std::string> FileNames() { return clue_files_list_; }

-  // Global shuffle flag getter
-  // @return Bool - whether this Op requires global shuffle
-  bool RequireGlobalShuffle() { return shuffle_global_; }
-
 private:
  // The entry point for when workers are launched.
  // @param worker_id - the id of the worker that is executing this function.
@ -263,9 +250,12 @@ class ClueOp : public ParallelOp {
  // @return Status - the error code returned.
  Status GetValue(const nlohmann::json &js, std::vector<std::string> key_chain, std::shared_ptr<Tensor> *t);

+  // Private function for computing the assignment of the column name map.
+  // @return - Status
+  Status ComputeColMap() override;
+
  int32_t device_id_;
  bool shuffle_files_;
-  bool shuffle_global_;
  bool finished_reading_dataset_;
  int32_t num_devices_;
  int64_t rows_per_buffer_;
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc
@ -129,10 +129,6 @@ CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path,
      rows_per_buffer_(rows_per_buffer),
      sampler_(std::move(sampler)),
      data_schema_(std::move(data_schema)) {
-  // Set the column name map (base class field)
-  for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-    column_name_id_map_[data_schema_->column(i).name()] = i;
-  }
  io_block_queues_.Init(num_workers_, queue_size);
 }

@ -627,5 +623,17 @@ Status CocoOp::GetClassIndexing(const std::string &dir, const std::string &file,
  *output_class_indexing = op->label_index_;
  return Status::OK();
 }
+
+Status CocoOp::ComputeColMap() {
+  // Set the column name map (base class field)
+  if (column_name_id_map_.empty()) {
+    for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
+      column_name_id_map_[data_schema_->column(i).name()] = i;
+    }
+  } else {
+    MS_LOG(WARNING) << "Column name map is already set!";
+  }
+  return Status::OK();
+}
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.h
@ -306,6 +306,10 @@ class CocoOp : public ParallelOp, public RandomAccessOp {
  template <typename T>
  Status SearchNodeInJson(nlohmann::json input_tree, std::string node_name, T *output_node);

+  // Private function for computing the assignment of the column name map.
+  // @return - Status
+  Status ComputeColMap() override;
+
  bool decode_;
  int64_t row_cnt_;
  int64_t buf_cnt_;
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
@ -94,12 +94,6 @@ void GeneratorOp::Dealloc() noexcept {
 Status GeneratorOp::Init() {
  // Reset BufferID
  buffer_id_ = 0;
-  // Setup column names map (base class field)
-  if (column_name_id_map_.empty()) {
-    for (int i = 0; i < column_names_.size(); ++i) {
-      column_name_id_map_[column_names_[i]] = i;
-    }
-  }
  Status ret;
  {
    // Acquire Python GIL
@ -255,7 +249,19 @@ Status GeneratorOp::Reset() {
 // Visitor accept method for NodePass
 Status GeneratorOp::Accept(NodePass *p, bool *modified) {
  // Downcast shared pointer then call visitor
-  return p->RunOnNode(std::static_pointer_cast<GeneratorOp>(shared_from_this()), modified);
+  return p->RunOnNode(shared_from_base<GeneratorOp>(), modified);
+}
+
+Status GeneratorOp::ComputeColMap() {
+  // Setup column names map (base class field)
+  if (column_name_id_map_.empty()) {
+    for (int i = 0; i < column_names_.size(); ++i) {
+      column_name_id_map_[column_names_[i]] = i;
+    }
+  } else {
+    MS_LOG(WARNING) << "Column name map is already set!";
+  }
+  return Status::OK();
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h
@ -150,6 +150,10 @@ class GeneratorOp : public PipelineOp {
  Status PyRowToTensorRow(py::object py_data, TensorRow *tensor_row);

  Status FillBuffer(TensorQTable *tt);
+
+  // Private function for computing the assignment of the column name map.
+  // @return - Status
+  Status ComputeColMap() override;
 };

 #pragma GCC visibility pop
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
@ -65,7 +65,7 @@ ImageFolderOp::ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::str
                             bool recursive, bool do_decode, const std::set<std::string> &exts,
                             const std::map<std::string, int32_t> &map, std::unique_ptr<DataSchema> data_schema,
                             std::shared_ptr<Sampler> sampler)
-    : ParallelOp(num_wkrs, queue_size),
+    : ParallelOp(num_wkrs, queue_size, std::move(sampler)),
      rows_per_buffer_(rows_per_buffer),
      folder_path_(file_dir),
      recursive_(recursive),
@ -73,15 +73,10 @@ ImageFolderOp::ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::str
      extensions_(exts),
      class_index_(map),
      data_schema_(std::move(data_schema)),
-      sampler_(std::move(sampler)),
      row_cnt_(0),
      buf_cnt_(0),
      sampler_ind_(0),
      dirname_offset_(0) {
-  // Set the column name map (base class field)
-  for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-    column_name_id_map_[data_schema_->column(i).name()] = i;
-  }
  folder_name_queue_ = std::make_unique<Queue<std::string>>(num_wkrs * queue_size);
  image_name_queue_ = std::make_unique<Queue<FolderImagesPair>>(num_wkrs * queue_size);
  io_block_queues_.Init(num_workers_, queue_size);
@ -108,7 +103,7 @@ Status ImageFolderOp::PrescanMasterEntry(const std::string &filedir) {
  // following loop puts the 2 level of shuffles together into 1 vector
  for (size_t ind = 0; ind < v.size(); ++ind) {
    while (v[ind]->second.empty() == false) {
-      DS_ASSERT(!(v[ind]->first.empty()));  // make sure that v[ind]->first.substr(1) is not out of bound
+      MS_ASSERT(!(v[ind]->first.empty()));  // make sure that v[ind]->first.substr(1) is not out of bound
      v[ind]->second.front()->second = class_index_.empty() ? ind : class_index_[v[ind]->first.substr(1)];
      image_label_pairs_.push_back(v[ind]->second.front());
      v[ind]->second.pop();
@ -416,7 +411,19 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se
 // Visitor accept method for NodePass
 Status ImageFolderOp::Accept(NodePass *p, bool *modified) {
  // Downcast shared pointer then call visitor
-  return p->RunOnNode(std::static_pointer_cast<ImageFolderOp>(shared_from_this()), modified);
+  return p->RunOnNode(shared_from_base<ImageFolderOp>(), modified);
+}
+
+Status ImageFolderOp::ComputeColMap() {
+  // Set the column name map (base class field)
+  if (column_name_id_map_.empty()) {
+    for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
+      column_name_id_map_[data_schema_->column(i).name()] = i;
+    }
+  } else {
+    MS_LOG(WARNING) << "Column name map is already set!";
+  }
+  return Status::OK();
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.h
@ -248,6 +248,10 @@ class ImageFolderOp : public ParallelOp, public RandomAccessOp {
  // @return Status - The error code return
  Status Reset() override;

+  // Private function for computing the assignment of the column name map.
+  // @return - Status
+  Status ComputeColMap() override;
+
  int32_t rows_per_buffer_;
  std::string folder_path_;  // directory of image folder
  bool recursive_;
@ -255,7 +259,6 @@ class ImageFolderOp : public ParallelOp, public RandomAccessOp {
  std::set<std::string> extensions_;  // extensions allowed
  std::map<std::string, int32_t> class_index_;
  std::unique_ptr<DataSchema> data_schema_;
-  std::shared_ptr<Sampler> sampler_;
  int64_t row_cnt_;
  int64_t buf_cnt_;
  int64_t sampler_ind_;
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
@ -64,7 +64,7 @@ Status ManifestOp::Builder::SanityCheck() {
 ManifestOp::ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string file, int32_t queue_size, bool decode,
                       const std::map<std::string, int32_t> &class_index, std::unique_ptr<DataSchema> data_schema,
                       std::shared_ptr<Sampler> sampler, std::string usage)
-    : ParallelOp(num_works, queue_size),
+    : ParallelOp(num_works, queue_size, std::move(sampler)),
      rows_per_buffer_(rows_per_buffer),
      io_block_pushed_(0),
      row_cnt_(0),
@ -72,14 +72,9 @@ ManifestOp::ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string f
      data_schema_(std::move(data_schema)),
      file_(file),
      class_index_(class_index),
-      sampler_(std::move(sampler)),
      decode_(decode),
      usage_(usage),
      buf_cnt_(0) {
-  // Set the column name map (base class field)
-  for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-    column_name_id_map_[data_schema_->column(i).name()] = i;
-  }
  io_block_queues_.Init(num_workers_, queue_size);
  (void)std::transform(usage_.begin(), usage_.end(), usage_.begin(), ::tolower);
 }
@ -420,5 +415,17 @@ Status ManifestOp::GetClassIndexing(const std::string &file, const py::dict &dic

  return Status::OK();
 }
+
+Status ManifestOp::ComputeColMap() {
+  // Set the column name map (base class field)
+  if (column_name_id_map_.empty()) {
+    for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
+      column_name_id_map_[data_schema_->column(i).name()] = i;
+    }
+  } else {
+    MS_LOG(WARNING) << "Column name map is already set!";
+  }
+  return Status::OK();
+}
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.h
@ -219,6 +219,10 @@ class ManifestOp : public ParallelOp, public RandomAccessOp {
  // @return Status - The error code return
  Status CountDatasetInfo();

+  // Private function for computing the assignment of the column name map.
+  // @return - Status
+  Status ComputeColMap() override;
+
  int32_t rows_per_buffer_;
  int64_t io_block_pushed_;
  int64_t row_cnt_;
@ -226,7 +230,6 @@ class ManifestOp : public ParallelOp, public RandomAccessOp {
  std::unique_ptr<DataSchema> data_schema_;
  std::string file_;  // file that store the information of images
  std::map<std::string, int32_t> class_index_;
-  std::shared_ptr<Sampler> sampler_;
  bool decode_;
  std::string usage_;
  int64_t buf_cnt_;
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
@ -196,10 +196,6 @@ Status MindRecordOp::Init() {
    data_schema_ = std::move(tmp_schema);
  }

-  for (int i = 0; i < static_cast<int>(columns_to_load_.size()); i++) {
-    column_name_id_map_[columns_to_load_[i]] = i;
-  }
-
  return Status::OK();
 }

@ -500,7 +496,18 @@ Status MindRecordOp::CountTotalRows(const std::vector<std::string> dataset_path,
 // Visitor accept method for NodePass
 Status MindRecordOp::Accept(NodePass *p, bool *modified) {
  // Downcast shared pointer then call visitor
-  return p->RunOnNode(std::static_pointer_cast<MindRecordOp>(shared_from_this()), modified);
+  return p->RunOnNode(shared_from_base<MindRecordOp>(), modified);
+}
+
+Status MindRecordOp::ComputeColMap() {
+  if (column_name_id_map_.empty()) {
+    for (int i = 0; i < static_cast<int>(columns_to_load_.size()); i++) {
+      column_name_id_map_[columns_to_load_[i]] = i;
+    }
+  } else {
+    MS_LOG(WARNING) << "Column name map is already set!";
+  }
+  return Status::OK();
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.h
@ -234,6 +234,10 @@ class MindRecordOp : public ParallelOp {

  Status FetchBlockBuffer(const int32_t &buffer_id);

+  // Private function for computing the assignment of the column name map.
+  // @return - Status
+  Status ComputeColMap() override;
+
  int32_t rows_per_buffer_;                                // The number of requested rows per buffer.
  std::vector<std::string> dataset_file_;                  // dataset files
  bool load_dataset_;                                      // load dataset from single file or not
--- a/Show More
+++ b/Show More