!41 Synchronization code to ms-incubator

Merge pull request !41 from changzherui/syn-code
2020-04-23 10:46:02 +08:00 · 2020-04-23 10:46:02 +08:00 · 2dabcb9e59
parent b4991dc090 87f7488e50
commit 2dabcb9e59
788 changed files with 26369 additions and 14668 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -5,8 +5,14 @@ include(${CMAKE_SOURCE_DIR}/cmake/options.cmake)

 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/modules/")

+if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Werror -Wno-return-std-move -Wno-unused-private-field -Wno-unused-lambda-capture -Wno-sign-compare -Wno-overloaded-virtual -Wno-unneeded-internal-declaration -Wno-unused-variable -Wno-pessimizing-move -Wno-inconsistent-missing-override -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")    
+else()
+    set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
+endif()
+
 set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp")
-set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
+
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC")
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

@ -14,16 +20,31 @@ set(PYBIND11_CPP_STANDARD -std=c++17)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPTION_CXX_FLAGS}")

 find_package(Threads)
+find_package(Patch)
+message(PATCH_EXECUTABLE = ${Patch_EXECUTABLE})
+
 include(${CMAKE_SOURCE_DIR}/cmake/mind_expression.cmake)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/flatbuffers/include)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/flatbuffers/include/flatbuffers)

 include(${CMAKE_SOURCE_DIR}/cmake/dependency_utils.cmake)
-find_package(Python3 COMPONENTS Interpreter Development)
+find_package(Python3 3.7 COMPONENTS Interpreter Development)
 if(Python3_FOUND)
    set(PYTHON_INCLUDE_DIRS "${Python3_INCLUDE_DIRS}")
    set(PYTHON_LIBRARIES "${Python3_LIBRARIES}")
+    if (WIN32)
+        if (Python3_DIR)
+            message("Python3_DIR set already: " ${Python3_DIR})
+        else()
+            string(LENGTH ${PYTHON_LIBRARIES} PYTHON_LIBRARIES_LEN)
+            string(LENGTH "libpythonxx.a" Python3_NAME_LEN)
+            math(EXPR Python3_DIR_LEN  ${PYTHON_LIBRARIES_LEN}-${Python3_NAME_LEN})
+            string(SUBSTRING ${Python3_LIBRARIES} 0 ${Python3_DIR_LEN} Python3_DIR)
+            message("Python3_DIR: " ${Python3_DIR})
+        endif()
+        link_directories(${Python3_DIR})
+    endif()
 else()
    find_python_package(py_inc py_lib)
    set(PYTHON_INCLUDE_DIRS "${py_inc}")
@ -55,3 +76,5 @@ add_subdirectory(mindspore/ccsrc)
 if (ENABLE_TESTCASES)
    add_subdirectory(tests)
 endif()
+
+include(cmake/package.cmake)
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 ![MindSpore Logo](docs/MindSpore-logo.png "MindSpore logo")
 ============================================================

- [What is MindSpore?](#what-is-MindSpore)
+- [What is MindSpore?](#what-is-mindspore)
    - [Automatic Differentiation](#automatic-differentiation)
    - [Automatic Parallel](#automatic-parallel)
 - [Installation](#installation)
@ -53,7 +53,7 @@ The goal of MindSpore automatic parallel is to build a training method that comb

 <img src="docs/Automatic-parallel.png" alt="Automatic Parallel" width="600"/>

-At present, MindSpore uses a fine-grained parallel strategy of splitting operators, that is, each operator in the figure is splited into a cluster to complete parallel operations. The splitting strategy during this period may be very complicated, but as a developer advocating Pythonic, you don't need to care about the underlying implementation, as long as the top-level API compute is efficient.
+At present, MindSpore uses a fine-grained parallel strategy of splitting operators, that is, each operator in the figure is splitted into a cluster to complete parallel operations. The splitting strategy during this period may be very complicated, but as a developer advocating Pythonic, you don't need to care about the underlying implementation, as long as the top-level API compute is efficient.

 ## Installation

@ -69,10 +69,11 @@ MindSpore offers build options across multiple backends:
 | GPU CUDA 9.2 | Ubuntu-x86 | ✔️ |
 | GPU CUDA 10.1 | Ubuntu-x86 | ✔️ |
 | CPU | Ubuntu-x86 | ✔️ |
+|  | Windows-x86 | ✔️ |

-For installation using pip, take `Ubuntu-x86` and `CPU` build version as an example:
+For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an example:

-1. Download whl from [MindSpore website](https://www.mindspore.cn/), and install the package.
+1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package.

    ```
    pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
@ -93,19 +94,69 @@ For installation using pip, take `Ubuntu-x86` and `CPU` build version as an exam
 MindSpore docker image is hosted on [Docker Hub](https://hub.docker.com/r/mindspore),
 currently the containerized build options are supported as follows:

-| Hardware Platform | Docker Image URL |
-| :---------------- | :--------------- |
-| CPU | `mindspore/mindspore-cpu:0.1.0-alpha` |
-| GPU CUDA 9.2 | `mindspore/mindspore-cuda9.2:0.1.0-alpha` |
-| GPU CUDA 10.1 | `mindspore/mindspore-cuda10.1:0.1.0-alpha` |
-| Ascend | <center>—</center> |
+| Hardware Platform | Docker Image Repository | Tag | Description |
+| :---------------- | :---------------------- | :-- | :---------- |
+| CPU | `mindspore/mindspore-cpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` CPU release. |
+|  |  | `devel` | Development environment provided to build MindSpore (with `CPU` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. |
+|  |  | `runtime` | Runtime environment provided to install MindSpore binary package with `CPU` backend. |
+| GPU | `mindspore/mindspore-gpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` GPU release. |
+|  |  | `devel` | Development environment provided to build MindSpore (with `GPU CUDA10.1` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. |
+|  |  | `runtime` | Runtime environment provided to install MindSpore binary package with `GPU` backend. |
+| Ascend | <center>&mdash;</center> | <center>&mdash;</center> | Coming soon. |

-Take `CPU` for example, you can directly pull the image using the below command:
-```
-docker pull mindspore/mindspore-cpu:0.1.0-alpha
-```
+* CPU

-If anyone wants to learn more about the build process of MindSpore docker images,
+    For `CPU` backend, you can directly pull and run the image using the below command:
+    ```
+    docker pull mindspore/mindspore-cpu:0.1.0-alpha
+    docker run -it mindspore/mindspore-cpu:0.1.0-alpha python -c 'import mindspore'
+    ```
+
+* GPU
+
+    For `GPU` backend, please make sure the `nvidia-container-toolkit` has been installed in advance, here are some install guidelines for `Ubuntu` users:
+    ```
+    DISTRIBUTION=$(. /etc/os-release; echo $ID$VERSION_ID)
+    curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | apt-key add -
+    curl -s -L https://nvidia.github.io/nvidia-docker/$DISTRIBUTION/nvidia-docker.list | tee /etc/apt/sources.list.d/nvidia-docker.list
+
+    sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit nvidia-docker2
+    sudo systemctl restart docker
+    ```
+
+    Then you can pull and run the image using the below command:
+    ```
+    docker pull mindspore/mindspore-gpu:0.1.0-alpha
+    docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.1.0-alpha /bin/bash
+    ```
+
+    To test if the docker image works, please execute the python code below and check the output:
+    ```python
+    import numpy as np
+    from mindspore import Tensor
+    from mindspore.ops import functional as F
+    import mindspore.context as context
+
+    context.set_context(device_target="GPU")
+    x = Tensor(np.ones([1,3,3,4]).astype(np.float32))
+    y = Tensor(np.ones([1,3,3,4]).astype(np.float32))
+    print(F.tensor_add(x, y))
+    ```
+    ```
+    [[[ 2.  2.  2.  2.],
+    [ 2.  2.  2.  2.],
+    [ 2.  2.  2.  2.]],
+
+    [[ 2.  2.  2.  2.],
+    [ 2.  2.  2.  2.],
+    [ 2.  2.  2.  2.]],
+
+    [[ 2.  2.  2.  2.],
+    [ 2.  2.  2.  2.],
+    [ 2.  2.  2.  2.]]]
+    ```
+
+If you want to learn more about the building process of MindSpore docker images,
 please check out `docker` folder for the details.

 ## Quickstart
--- a/build.bat
+++ b/build.bat
@ -0,0 +1,54 @@
+@rem Copyright 2020 Huawei Technologies Co., Ltd
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem ============================================================================
+@echo off
+@title mindspore_build
+ 
+SET BASEPATH=%CD%
+IF NOT EXIST %BASEPATH%/build (
+         md "build"
+         )
+ 
+cd %BASEPATH%/build
+SET BUILD_PATH=%CD%
+ 
+IF NOT EXIST %BUILD_PATH%/mindspore (
+         md "mindspore"
+         )
+ 
+cd %CD%/mindspore
+ 
+cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_CPU=ON -DENABLE_MINDDATA=ON -DUSE_GLOG=ON -G "CodeBlocks - MinGW Makefiles" ../..
+IF NOT %errorlevel% == 0 (
+    goto run_fail
+    )
+ 
+IF "%1%" == "" (
+    cmake --build . --target package -- -j6
+    ) ELSE (
+        cmake --build . --target package -- -j%1%
+    )
+IF NOT %errorlevel% == 0 (
+    goto run_fail
+    )
+
+cd %BASEPATH%
+
+goto run_eof
+
+:run_fail
+    cd %BASEPATH%
+    echo "build fail."
+
+:run_eof
--- a/build.sh
+++ b/build.sh
@ -16,7 +16,6 @@

 set -e
 BASEPATH=$(cd "$(dirname $0)"; pwd)
-PROJECT_PATH="${BASEPATH}"
 CUDA_PATH=""
 CUDNN_PATH=""
 export BUILD_PATH="${BASEPATH}/build/"
@ -24,7 +23,7 @@ export BUILD_PATH="${BASEPATH}/build/"
 usage()
 {
  echo "Usage:"
-  echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-s] [-b ge|cpu] [-m infer|train] \\"
+  echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge|cpu] [-m infer|train] \\"
  echo "              [-a on|off] [-g on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
  echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K]"
  echo ""
@ -36,7 +35,6 @@ usage()
  echo "    -t Run testcases switch, default on"
  echo "    -g Use glog to output log, default on"
  echo "    -h Print usage"
-  echo "    -s Install or setup"
  echo "    -b Select other backend, available: \\"
  echo "           ge:graph engine, cpu"
  echo "    -m Select mode, available: infer, train, default is infer "
@ -77,7 +75,6 @@ checkopts()
  VERBOSE=""
  ENABLE_COVERAGE="off"
  RUN_TESTCASES="off"
-  EXECUTE_SETUP="off"
  ENABLE_BACKEND=""
  TRAIN_MODE="INFER"
  ENABLE_ASAN="off"
@ -129,9 +126,6 @@ checkopts()
        usage
        exit 0
        ;;
-      s)
-        EXECUTE_SETUP="on"
-        ;;
      b)
        if [[ "X$OPTARG" != "Xge" && "X$OPTARG" != "Xcpu" ]]; then
          echo "Invalid value ${OPTARG} for option -b"
@ -139,9 +133,6 @@ checkopts()
          exit 1
        fi
        ENABLE_BACKEND=$(echo "$OPTARG" | tr '[a-z]' '[A-Z]')
-        if [[ "X$ENABLE_BACKEND" == "XGE" ]]; then
-          ENABLE_GE="on"
-        fi
        if [[ "X$ENABLE_BACKEND" != "XCPU" ]]; then
          ENABLE_CPU="on"
        fi
@ -297,7 +288,7 @@ build_mindspore()
    if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then
        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON"
    fi
-    CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR^^}"
+    CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}"
    if [[ "X$ENABLE_MPI" = "Xon" ]]; then
        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON"
    fi
@ -323,10 +314,10 @@ build_mindspore()
    if [[ "X$INC_BUILD" = "Xoff" ]]; then
      cmake ${CMAKE_ARGS} ../..
    fi
-    make ${VERBOSE} -j$THREAD_NUM
-    if [[ "X$EXECUTE_SETUP" = "Xon" ]]; then
-      make install
+    if [[ -n "$VERBOSE" ]]; then
+      CMAKE_VERBOSE="--verbose"
    fi
+    cmake --build . --target package ${CMAKE_VERBOSE} -j$THREAD_NUM
    echo "success to build mindspore project!"
 }

@ -457,24 +448,7 @@ else
    build_mindspore
 fi

-if [[ "X$INC_BUILD" = "Xoff" ]]; then
-    if [[ "X$ENABLE_GE" = "Xon" ]]; then
-        bash "${PROJECT_PATH}/package.sh" ge
-    elif [[ "X$ENABLE_GPU" = "Xon" ]]; then
-        bash "${PROJECT_PATH}/package.sh" ms gpu
-    elif [[ "X$ENABLE_D" = "Xon" ]]; then
-        bash "${PROJECT_PATH}/package.sh" ms ascend
-    elif [[ "X$ENABLE_CPU" = "Xon" ]]; then
-        bash "${PROJECT_PATH}/package.sh" ms cpu
-    else
-        bash "${PROJECT_PATH}/package.sh" debug
-    fi
-fi
-
 cp -rf ${BUILD_PATH}/package/mindspore/lib ${BUILD_PATH}/../mindspore
 cp -rf ${BUILD_PATH}/package/mindspore/*.so ${BUILD_PATH}/../mindspore

-if [[ -d "${BUILD_PATH}/package/build" ]]; then
-    rm -rf "${BUILD_PATH}/package/build"
-fi
 echo "---------------- mindspore: build end   ----------------"
--- a/cmake/dependency_securec.cmake
+++ b/cmake/dependency_securec.cmake
@ -9,6 +9,9 @@ if (NOT TARGET securec)
  set(_ms_tmp_CMAKE_C_FLAGS ${CMAKE_C_FLAGS})

  set(CMAKE_C_FLAGS "${SECURE_CXX_FLAGS}")
+  if (CMAKE_SYSTEM_NAME MATCHES "Windows")
+    add_compile_definitions(SECUREC_ONLY_DECLARE_MEMSET)
+  endif()
  add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/securec ${CMAKE_BINARY_DIR}/securec)
  set(CMAKE_POSITION_INDEPENDENT_CODE ${_ms_tmp_CMAKE_POSITION_INDEPENDENT_CODE})
  set(CMAKE_C_FLAGS ${_ms_tmp_CMAKE_C_FLAGS})
--- a/cmake/external_libs/dmlc_core.cmake
+++ b/cmake/external_libs/dmlc_core.cmake
@ -1,4 +1,4 @@
-mindspore_add_pkg(dmlc_core
+mindspore_add_pkg(dmlc-core
        VER 0.3
        HEAD_ONLY ./
 	URL https://github.com/dmlc/dmlc-core/archive/808f485387f9a03f78fa9f1159f387d0d91b7a28.zip
--- a/cmake/external_libs/flatbuffers.cmake
+++ b/cmake/external_libs/flatbuffers.cmake
@ -1,5 +1,8 @@
 set(flatbuffers_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2")
 set(flatbuffers_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
+if (WIN32)
+ set(flatbuffers_USE_STATIC_LIBS ON)
+endif()
 mindspore_add_pkg(flatbuffers
        VER 1.11.0
        LIBS flatbuffers
--- a/cmake/external_libs/gtest.cmake
+++ b/cmake/external_libs/gtest.cmake
@ -9,5 +9,5 @@ mindspore_add_pkg(gtest
        -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON)
 include_directories(${gtest_INC})
 add_library(mindspore::gtest ALIAS gtest::gtest)
-file(COPY ${gtest_LIBPATH}/libgtest.so DESTINATION ${CMAKE_BINARY_DIR}/googletest/googlemock/gtest)
-file(COPY ${gtest_LIBPATH}/libgtest_main.so DESTINATION ${CMAKE_BINARY_DIR}/googletest/googlemock/gtest)
+file(COPY ${gtest_LIBPATH}/libgtest${CMAKE_SHARED_LIBRARY_SUFFIX} DESTINATION ${CMAKE_BINARY_DIR}/googletest/googlemock/gtest)
+file(COPY ${gtest_LIBPATH}/libgtest_main${CMAKE_SHARED_LIBRARY_SUFFIX} DESTINATION ${CMAKE_BINARY_DIR}/googletest/googlemock/gtest)
--- a/cmake/external_libs/jpeg_turbo.cmake
+++ b/cmake/external_libs/jpeg_turbo.cmake
@ -1,6 +1,10 @@
-
 set(jpeg_turbo_USE_STATIC_LIBS ON)
-set(jpeg_turbo_CFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -D_FORTIFY_SOURCE=2 -O2")
+if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    set(jpeg_turbo_CFLAGS "-fstack-protector-all -Wno-uninitialized -Wno-unused-parameter -fPIC -D_FORTIFY_SOURCE=2 -O2")
+else()
+    set(jpeg_turbo_CFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -D_FORTIFY_SOURCE=2 -O2")
+endif()
+
 set(jpeg_turbo_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
 mindspore_add_pkg(jpeg_turbo
        VER 2.0.4
--- a/cmake/external_libs/libtiff.cmake
+++ b/cmake/external_libs/libtiff.cmake
@ -1,8 +1,18 @@
+if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    set(tiff_CXXFLAGS "-fstack-protector-all -Wno-uninitialized -Wno-unused-parameter -Wno-unused-result \
+    -Wno-unused-but-set-variable -fPIC -D_FORTIFY_SOURCE=2 -O2")
+    set(tiff_CFLAGS "-fstack-protector-all -Wno-uninitialized -Wno-unused-parameter -Wno-unused-result \
+    -Wno-unused-but-set-variable -fPIC -D_FORTIFY_SOURCE=2 -O2")
+else()
+    set(tiff_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -Wno-unused-result \
+        -Wno-unused-but-set-variable -fPIC -D_FORTIFY_SOURCE=2 -O2")
+    set(tiff_CFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -Wno-unused-result \
+        -Wno-unused-but-set-variable -fPIC -D_FORTIFY_SOURCE=2 -O2")
+    if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+	set(tiff_CFLAGS "${tiff_CFLAGS} -Wno-int-to-pointer-cast -Wno-implicit-fallthrough -Wno-pointer-to-int-cast")
+    endif()
+endif()

-set(tiff_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -Wno-unused-result \
-    -Wno-unused-but-set-variable -fPIC -D_FORTIFY_SOURCE=2 -O2")
-set(tiff_CFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -Wno-unused-result \
-    -Wno-unused-but-set-variable -fPIC -D_FORTIFY_SOURCE=2 -O2")
 set(tiff_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")

 mindspore_add_pkg(tiff
--- a/cmake/external_libs/mkl_dnn.cmake
+++ b/cmake/external_libs/mkl_dnn.cmake
@ -1,11 +1,22 @@
 set(onednn_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2")
 set(onednn_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
-mindspore_add_pkg(onednn
+if (CMAKE_SYSTEM_NAME MATCHES "Windows")
+    mindspore_add_pkg(onednn
+        VER 1.1.1
+        LIBS dnnl mkldnn
+        HEAD_ONLY ./
+        RELEASE on
+        URL https://github.com/oneapi-src/oneDNN/releases/download/v1.1.1/dnnl_win_1.1.1_cpu_vcomp.zip
+        MD5 ecaab9ed549643067699c80e5cea1c23)
+else()
+    mindspore_add_pkg(onednn
        VER 1.1.2
        LIBS dnnl mkldnn
        URL https://github.com/oneapi-src/oneDNN/archive/v1.1.2.tar.gz
        MD5 ab40d52230f3ad1d7a6f06ce0f6bc17a
        CMAKE_OPTION -DDNNL_ARCH_OPT_FLAGS='' -DDNNL_CPU_RUNTIME='SEQ' -DDNNL_BUILD_EXAMPLES=OFF -DDNNL_BUILD_TESTS=OFF)
+endif()
+
 include_directories(${onednn_INC})
 add_library(mindspore::dnnl ALIAS onednn::dnnl)
 add_library(mindspore::mkldnn ALIAS onednn::mkldnn)
--- a/cmake/external_libs/opencv.cmake
+++ b/cmake/external_libs/opencv.cmake
@ -1,31 +1,76 @@
+if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    set(opencv_CXXFLAGS "-fstack-protector-all -Wno-uninitialized -Wno-unused-parameter -D_FORTIFY_SOURCE=2 -O2")
+    set(opencv_CFLAGS "-fstack-protector-all -Wno-uninitialized -Wno-unused-parameter -D_FORTIFY_SOURCE=2 -O2")
+    set(opencv_LDFLAGS "-Wl")
+elseif (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+    set(opencv_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -D_FORTIFY_SOURCE=2 -O2")
+    set(opencv_CFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -D_FORTIFY_SOURCE=2 -O2")
+    set(opencv_CXXFLAGS "${opencv_CXXFLAGS} -Wno-attributes -Wno-unknown-pragmas")
+    set(opencv_CXXFLAGS "${opencv_CXXFLAGS} -Wno-unused-value -Wno-implicit-fallthrough")
+else()
+    set(opencv_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -D_FORTIFY_SOURCE=2 -O2")
+    set(opencv_CFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -D_FORTIFY_SOURCE=2 -O2")
+    set(opencv_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
+endif()

-set(opencv_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -D_FORTIFY_SOURCE=2 -O2")
-set(opencv_CFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -D_FORTIFY_SOURCE=2 -O2")
-set(opencv_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
+if (WIN32)
+    mindspore_add_pkg(opencv
+            VER 4.2.0
+            LIBS libopencv_core420.dll.a libopencv_imgcodecs420.dll.a libopencv_imgproc420.dll.a
+            LIB_PATH x64/mingw/lib
+            URL https://github.com/opencv/opencv/archive/4.2.0.tar.gz
+            MD5 e8cb208ce2723481408b604b480183b6
+            CMAKE_OPTION -DCMAKE_BUILD_TYPE=Release -DWITH_PROTOBUF=OFF -DWITH_WEBP=OFF -DWITH_IPP=OFF -DWITH_ADE=OFF
+            -DBUILD_ZLIB=ON
+            -DBUILD_JPEG=ON
+            -DBUILD_PNG=ON
+            -DBUILD_OPENEXR=ON
+            -DBUILD_TESTS=OFF
+            -DBUILD_PERF_TESTS=OFF
+            -DBUILD_opencv_apps=OFF
+            -DCMAKE_SKIP_RPATH=TRUE
+            -DBUILD_opencv_python3=OFF
+            -DBUILD_opencv_videoio=OFF
+            -DWITH_FFMPEG=OFF
+            -DWITH_TIFF=ON
+            -DBUILD_TIFF=OFF
+            -DWITH_JASPER=OFF
+            -DBUILD_JASPER=OFF
+            -DTIFF_INCLUDE_DIR=${tiff_INC}
+            -DTIFF_LIBRARY=${tiff_LIB})
+else()
+    mindspore_add_pkg(opencv
+            VER 4.2.0
+            LIBS opencv_core opencv_imgcodecs opencv_imgproc
+            URL https://github.com/opencv/opencv/archive/4.2.0.tar.gz
+            MD5 e8cb208ce2723481408b604b480183b6
+            CMAKE_OPTION -DCMAKE_BUILD_TYPE=Release -DWITH_PROTOBUF=OFF -DWITH_WEBP=OFF -DWITH_IPP=OFF -DWITH_ADE=OFF
+            -DBUILD_ZLIB=ON
+            -DBUILD_JPEG=ON
+            -DBUILD_PNG=ON
+            -DBUILD_OPENEXR=ON
+            -DBUILD_TESTS=OFF
+            -DBUILD_PERF_TESTS=OFF
+            -DBUILD_opencv_apps=OFF
+            -DCMAKE_SKIP_RPATH=TRUE
+            -DBUILD_opencv_python3=OFF
+            -DWITH_FFMPEG=OFF
+            -DWITH_TIFF=ON
+            -DBUILD_TIFF=OFF
+            -DWITH_JASPER=OFF
+            -DBUILD_JASPER=OFF
+            -DTIFF_INCLUDE_DIR=${tiff_INC}
+            -DTIFF_LIBRARY=${tiff_LIB})
+endif()

-mindspore_add_pkg(opencv
-        VER 4.2.0
-        LIBS opencv_core opencv_imgcodecs opencv_imgproc
-        URL https://github.com/opencv/opencv/archive/4.2.0.tar.gz
-        MD5 e8cb208ce2723481408b604b480183b6
-        CMAKE_OPTION -DCMAKE_BUILD_TYPE=Release -DWITH_PROTOBUF=OFF -DWITH_WEBP=OFF -DWITH_IPP=OFF -DWITH_ADE=OFF
-        -DBUILD_ZLIB=ON
-        -DBUILD_JPEG=ON
-        -DBUILD_PNG=ON
-        -DBUILD_OPENEXR=ON
-        -DBUILD_TESTS=OFF
-        -DBUILD_PERF_TESTS=OFF
-        -DBUILD_opencv_apps=OFF
-        -DCMAKE_SKIP_RPATH=TRUE
-        -DBUILD_opencv_python3=OFF
-        -DWITH_FFMPEG=OFF
-        -DWITH_TIFF=ON
-        -DBUILD_TIFF=OFF
-        -DWITH_JASPER=OFF
-        -DBUILD_JASPER=OFF
-        -DTIFF_INCLUDE_DIR=${tiff_INC}
-        -DTIFF_LIBRARY=${tiff_LIB})
-include_directories(${opencv_INC}/opencv4)
-add_library(mindspore::opencv_core ALIAS opencv::opencv_core)
-add_library(mindspore::opencv_imgcodecs ALIAS opencv::opencv_imgcodecs)
-add_library(mindspore::opencv_imgproc ALIAS opencv::opencv_imgproc)
+if (WIN32)
+    include_directories(${opencv_INC})
+    add_library(mindspore::opencv_core ALIAS opencv::libopencv_core420.dll.a)
+    add_library(mindspore::opencv_imgcodecs ALIAS opencv::libopencv_imgcodecs420.dll.a)
+    add_library(mindspore::opencv_imgproc ALIAS opencv::libopencv_imgproc420.dll.a)
+else()
+    include_directories(${opencv_INC}/opencv4)
+    add_library(mindspore::opencv_core ALIAS opencv::opencv_core)
+    add_library(mindspore::opencv_imgcodecs ALIAS opencv::opencv_imgcodecs)
+    add_library(mindspore::opencv_imgproc ALIAS opencv::opencv_imgproc)
+endif()
--- a/cmake/external_libs/protobuf.cmake
+++ b/cmake/external_libs/protobuf.cmake
@ -1,22 +1,27 @@
-mindspore_add_pkg(protobuf
-        VER 3.8.0
-        HEAD_ONLY ./
-        URL https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz
-        MD5 3d9e32700639618a4d2d342c99d4507a)
-
-set(protobuf_BUILD_TESTS OFF CACHE BOOL "Disable protobuf test")
-set(protobuf_BUILD_SHARED_LIBS OFF CACHE BOOL "Gen shared library")
+set(protobuf_USE_STATIC_LIBS ON)
+if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    set(protobuf_CXXFLAGS "-fstack-protector-all -Wno-uninitialized -Wno-unused-parameter -fPIC -fvisibility=hidden -D_FORTIFY_SOURCE=2 -O2")
+else()
+    set(protobuf_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fvisibility=hidden -D_FORTIFY_SOURCE=2 -O2")
+endif()
+set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
 set(_ms_tmp_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
-
+set(CMAKE_CXX_FLAGS ${_ms_tmp_CMAKE_CXX_FLAGS})
 string(REPLACE " -Wall" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 string(REPLACE " -Werror" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-add_subdirectory(${protobuf_DIRPATH}/cmake ${protobuf_DIRPATH}/build)

-set(CMAKE_CXX_FLAGS ${_ms_tmp_CMAKE_CXX_FLAGS})
+mindspore_add_pkg(protobuf
+        VER 3.8.0
+        LIBS protobuf
+        EXE protoc
+        URL https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz
+        MD5 3d9e32700639618a4d2d342c99d4507a
+        CMAKE_PATH cmake/
+        CMAKE_OPTION -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_SHARED_LIBS=OFF)

-set(PROTOBUF_LIBRARY protobuf::libprotobuf)
-include_directories(${protobuf_DIRPATH}/src)
-add_library(mindspore::protobuf ALIAS libprotobuf)
+include_directories(${protobuf_INC})
+add_library(mindspore::protobuf ALIAS protobuf::protobuf)
+set(CMAKE_CXX_FLAGS  ${_ms_tmp_CMAKE_CXX_FLAGS})

 function(ms_protobuf_generate c_var h_var)
    if(NOT ARGN)
@ -72,22 +77,36 @@ function(ms_protobuf_generate_py c_var h_var py_var)
        list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc")
        list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h")
        list(APPEND ${py_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py")
-
-        add_custom_command(
-                OUTPUT "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc"
-                "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h"
-                "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py"
-                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
-                COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/${rel_path}"
-                COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
-                COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
-                COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
-                COMMAND perl -pi -e "s/import (.+_pb2.*)/from . import \\1/"  "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py"
-                COMMAND cp "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py" "${PROJECT_SOURCE_DIR}/mindspore/train/"
-                DEPENDS protobuf::protoc ${abs_file}
-                COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
+        if (WIN32)
+            add_custom_command(
+                    OUTPUT "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc"
+                    "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h"
+                    "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py"
+                    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+                    COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/${rel_path}"
+                    COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
+                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
+                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
+                    COMMAND perl -pi.bak -e "s/import (.+_pb2.*)/from . import \\1/"  "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py"
+                    COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py" "${PROJECT_SOURCE_DIR}/mindspore/train/"
+                    DEPENDS protobuf::protoc ${abs_file}
+                    COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
+        else()
+            add_custom_command(
+                    OUTPUT "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc"
+                    "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h"
+                    "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py"
+                    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+                    COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/${rel_path}"
+                    COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
+                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
+                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
+                    COMMAND perl -pi -e "s/import (.+_pb2.*)/from . import \\1/"  "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py"
+                    COMMAND cp "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py" "${PROJECT_SOURCE_DIR}/mindspore/train/"
+                    DEPENDS protobuf::protoc ${abs_file}
+                    COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
+        endif()
    endforeach()
-
    set_source_files_properties(${${c_var}} ${${h_var}} ${${py_var}} PROPERTIES GENERATED TRUE)
    set(${c_var} ${${c_var}} PARENT_SCOPE)
    set(${h_var} ${${h_var}} PARENT_SCOPE)
--- a/cmake/external_libs/sqlite.cmake
+++ b/cmake/external_libs/sqlite.cmake
@ -1,15 +1,30 @@
+if (WIN32)
+    mindspore_add_pkg(sqlite
+        VER 3.31.1
+        LIBS sqlite3
+        URL https://sqlite.org/2020/sqlite-amalgamation-3310100.zip
+        MD5 2b7bfcdd97dc281903a9aee966213fe4
+        PATCHES ${CMAKE_SOURCE_DIR}/third_party/patch/sqlite/sqlite.windows.patch001
+        CMAKE_OPTION " "
+    )

-set(sqlite_USE_STATIC_LIBS ON)
-set(sqlite_CXXFLAGS)
-set(sqlite_CFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -D_FORTIFY_SOURCE=2 -O2")
-set(sqlite_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
-
-mindspore_add_pkg(sqlite
+else ()
+    set(sqlite_USE_STATIC_LIBS ON) 
+    set(sqlite_CXXFLAGS)
+    if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+        set(sqlite_CFLAGS "-fstack-protector-all -Wno-uninitialized -Wno-unused-parameter -fPIC -D_FORTIFY_SOURCE=2 -O2")
+    else()
+        set(sqlite_CFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -D_FORTIFY_SOURCE=2 -O2")
+    endif()
+    set(sqlite_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
+    mindspore_add_pkg(sqlite
        VER 3.31.1
        LIBS sqlite3
        URL https://github.com/sqlite/sqlite/archive/version-3.31.1.tar.gz
        MD5 5f4e7b4016c15f4fb5855615279819da
        PATCHES ${CMAKE_SOURCE_DIR}/third_party/patch/sqlite/sqlite.patch001
        CONFIGURE_COMMAND ./configure --enable-shared=no --disable-tcl --disable-editline --enable-json1)
+endif ()
+
 include_directories(${sqlite_INC})
-add_library(mindspore::sqlite ALIAS sqlite::sqlite3)
+add_library(mindspore::sqlite ALIAS sqlite::sqlite3)
--- a/cmake/external_libs/tvm_gpu.cmake
+++ b/cmake/external_libs/tvm_gpu.cmake
@ -1,8 +1,16 @@
-set(incubator_tvm_gpu_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2")
-set(incubator_tvm_gpu_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
+set(incubator_tvm_gpu_CFLAGS "-pipe -Wall -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -O2")
+set(incubator_tvm_gpu_CXXFLAGS "-std=c++11 -pipe -Wall -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -O2")
+set(USE_CUDA "ON")
 mindspore_add_pkg(incubator_tvm_gpu
        VER 0.6.0
-        HEAD_ONLY ./
+        LIBS tvm
        URL https://github.com/apache/incubator-tvm/archive/v0.6.0.tar.gz
-        MD5 9cbbd32545a776023acabbba270449fe)
-
+        MD5 9cbbd32545a776023acabbba270449fe
+        SUBMODULES ${dlpack_DIRPATH} ${dmlc-core_DIRPATH} ${rang_DIRPATH}
+        SOURCEMODULES topi/python/topi python/tvm
+        PATCHES ${CMAKE_SOURCE_DIR}/third_party/patch/incubator-tvm/find_library.patch
+                ${CMAKE_SOURCE_DIR}/third_party/patch/incubator-tvm/include.patch
+                ${CMAKE_SOURCE_DIR}/third_party/patch/incubator-tvm/src_pass.patch
+        CMAKE_OPTION -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON)
+include_directories(${incubator_tvm_gpu_INC})
+add_library(mindspore::tvm ALIAS incubator_tvm_gpu::tvm)
--- a/cmake/mind_expression.cmake
+++ b/cmake/mind_expression.cmake
@ -1,6 +1,10 @@
 set(SECURE_CXX_FLAGS "")
 if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-    set(SECURE_CXX_FLAGS "-fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack")
+    if (WIN32)
+        set(SECURE_CXX_FLAGS "-fstack-protector-all")
+    else()
+	set(SECURE_CXX_FLAGS "-fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack")
+    endif()
 endif()
 set(_ms_tmp_CMAKE_CXX_FLAGS_F ${CMAKE_CXX_FLAGS})
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@ -19,7 +19,11 @@ option(ENABLE_MPI "enable mpi" OFF)
 option(ENABLE_AKG "enable akg" OFF)

 if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    set(OPTION_CXX_FLAGS "${OPTION_CXX_FLAGS} -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack")
+    if (WIN32)
+        set(OPTION_CXX_FLAGS "${OPTION_CXX_FLAGS} -fstack-protector-all")
+    else()
+        set(OPTION_CXX_FLAGS "${OPTION_CXX_FLAGS} -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack")
+    endif()
 endif()

 if (CMAKE_SYSTEM_NAME MATCHES "Darwin")
@ -41,8 +45,8 @@ endif()

 if (DEBUG_MODE)
    set(CMAKE_BUILD_TYPE "Debug")
-else()
    add_compile_definitions(MEM_REUSE_DEBUG)
+else()
    set(CMAKE_BUILD_TYPE "Release")
 endif()

@ -60,6 +64,7 @@ endif()

 if (ENABLE_GPU)
    set(ENABLE_GPUQUE ON)
+    add_compile_definitions(ENABLE_GPU_COLLECTIVE)
 endif()

 if (ENABLE_GE)
@ -106,4 +111,4 @@ endif()

 if(ENABLE_DUMP_E2E)
    add_compile_definitions(ENABLE_DUMP_E2E)
-endif()
+endif()
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@ -0,0 +1,217 @@
+# include dependency
+include(CMakePackageConfigHelpers)
+include(GNUInstallDirs)
+
+# set package information
+set(CPACK_PACKAGE_NAME ${PROJECT_NAME})
+set(CPACK_GENERATOR "External")
+set(CPACK_EXTERNAL_PACKAGE_SCRIPT ${CMAKE_SOURCE_DIR}/cmake/package_script.cmake)
+set(CPACK_EXTERNAL_ENABLE_STAGING true)
+set(CPACK_TEMPORARY_PACKAGE_FILE_NAME ${CMAKE_SOURCE_DIR}/build/package/mindspore)
+set(CPACK_TEMPORARY_INSTALL_DIRECTORY ${CMAKE_SOURCE_DIR}/build/package/mindspore)
+if (ENABLE_GE)
+    set(CPACK_MS_BACKEND "ge")
+    set(CPACK_MS_PACKAGE_NAME "mindspore")
+elseif (ENABLE_GPU)
+    set(CPACK_MS_BACKEND "ms")
+    set(CPACK_MS_PACKAGE_NAME "mindspore-gpu")
+elseif (ENABLE_D)
+    set(CPACK_MS_BACKEND "ms")
+    set(CPACK_MS_PACKAGE_NAME "mindspore-ascend")
+elseif (ENABLE_CPU)
+    set(CPACK_MS_BACKEND "ms")
+    set(CPACK_MS_PACKAGE_NAME "mindspore")
+else ()
+    set(CPACK_MS_BACKEND "debug")
+    set(CPACK_MS_PACKAGE_NAME "mindspore")
+endif ()
+include(CPack)
+
+# set install path
+set(INSTALL_LIB_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Installation directory for libraries")
+set(INSTALL_PY_DIR ".")
+set(INSTALL_BASE_DIR ".")
+
+if (CMAKE_SYSTEM_NAME MATCHES "Windows")
+    set(INSTALL_LIB_DIR ".")
+    set(onednn_LIBPATH ${onednn_LIBPATH}/../bin/)
+    set(glog_LIBPATH ${glog_LIBPATH}/../bin/)
+    set(opencv_LIBPATH ${opencv_LIBPATH}/../bin/)
+    set(jpeg_turbo_LIBPATH ${jpeg_turbo_LIBPATH}/../bin/)
+    set(sqlite_LIBPATH ${sqlite_LIBPATH}/../bin/)
+else ()
+    set(INSTALL_LIB_DIR "lib")
+endif ()
+
+# set package files
+install(
+    TARGETS _c_expression
+    DESTINATION ${INSTALL_BASE_DIR}
+    COMPONENT mindspore
+)
+
+install(
+    TARGETS mindspore_gvar
+    DESTINATION ${INSTALL_LIB_DIR}
+    COMPONENT mindspore
+)
+
+if (USE_GLOG)
+    file(GLOB_RECURSE GLOG_LIB_LIST ${glog_LIBPATH}/libglog*)
+    install(
+        FILES ${GLOG_LIB_LIST}
+        DESTINATION ${INSTALL_LIB_DIR}
+        COMPONENT mindspore
+    )
+endif ()
+
+if (ENABLE_MINDDATA)
+    install(
+        TARGETS _c_dataengine _c_mindrecord
+        DESTINATION ${INSTALL_BASE_DIR}
+        COMPONENT mindspore
+    )
+
+    file(GLOB_RECURSE OPENCV_LIB_LIST
+            ${opencv_LIBPATH}/libopencv_core*
+            ${opencv_LIBPATH}/libopencv_imgcodecs*
+            ${opencv_LIBPATH}/libopencv_imgproc*
+    )
+    install(
+        FILES ${OPENCV_LIB_LIST}
+        DESTINATION ${INSTALL_LIB_DIR}
+        COMPONENT mindspore
+    )
+endif ()
+
+if (ENABLE_CPU)
+    if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+        file(GLOB_RECURSE DNNL_LIB_LIST ${onednn_LIBPATH}/libdnnl${CMAKE_SHARED_LIBRARY_SUFFIX}*)
+    elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
+        file(GLOB_RECURSE DNNL_LIB_LIST ${onednn_LIBPATH}/libdnnl*${CMAKE_SHARED_LIBRARY_SUFFIX}*)
+    elseif (CMAKE_SYSTEM_NAME MATCHES "Windows")
+        file(GLOB_RECURSE DNNL_LIB_LIST ${onednn_LIBPATH}/dnnl.dll)
+    endif ()
+    install(
+        FILES ${DNNL_LIB_LIST}
+        DESTINATION ${INSTALL_LIB_DIR}
+        COMPONENT mindspore
+    )
+endif ()
+
+if (ENABLE_GPU)
+    if (ENABLE_MPI)
+        install(
+            TARGETS _ms_mpi
+            DESTINATION ${INSTALL_BASE_DIR}
+            COMPONENT mindspore
+        )
+        install(
+            TARGETS gpu_collective
+            DESTINATION ${INSTALL_LIB_DIR}
+            COMPONENT mindspore
+        )
+    endif ()
+    install(
+        TARGETS gpu_queue
+        DESTINATION ${INSTALL_LIB_DIR}
+        COMPONENT mindspore
+    )
+endif ()
+
+if (NOT ENABLE_GE)
+    if (ENABLE_D)
+        if (DEFINED ENV{ASCEND_CUSTOM_PATH})
+            set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH})
+        else ()
+            set(ASCEND_PATH /usr/local/Ascend)
+        endif ()
+        set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common)
+
+        install(
+            FILES
+                ${CMAKE_BINARY_DIR}/graphengine/src/common/graph/libgraph.so
+                ${CMAKE_BINARY_DIR}/graphengine/src/ge/common/libge_common.so
+                ${CMAKE_BINARY_DIR}/graphengine/src/ge/ge_runtime/libge_runtime.so
+                ${ASCEND_DRIVER_PATH}/libslog.so
+                ${ASCEND_DRIVER_PATH}/libc_sec.so
+            DESTINATION ${INSTALL_LIB_DIR}
+            COMPONENT mindspore
+        )
+    elseif (ENABLE_TESTCASES)
+        install(
+            FILES
+                ${CMAKE_BINARY_DIR}/graphengine/src/common/graph/libgraph.so
+                ${CMAKE_SOURCE_DIR}/graphengine/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR}/libslog.so
+                ${CMAKE_SOURCE_DIR}/graphengine/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR}/libc_sec.so
+            DESTINATION ${INSTALL_LIB_DIR}
+            COMPONENT mindspore
+        )
+    endif ()
+endif ()
+
+if (CMAKE_SYSTEM_NAME MATCHES "Windows")
+    get_filename_component(CXX_DIR ${CMAKE_CXX_COMPILER} PATH)
+    file(GLOB CXX_LIB_LIST ${CXX_DIR}/*.dll)
+    file(GLOB JPEG_LIB_LIST ${jpeg_turbo_LIBPATH}/*.dll)
+    file(GLOB SQLITE_LIB_LIST ${sqlite_LIBPATH}/*.dll)
+    install(
+        FILES ${CXX_LIB_LIST} ${JPEG_LIB_LIST} ${SQLITE_LIB_LIST}
+        DESTINATION ${INSTALL_LIB_DIR}
+        COMPONENT mindspore
+    )
+endif ()
+
+# set python files
+file(GLOB MS_PY_LIST ${CMAKE_SOURCE_DIR}/mindspore/*.py)
+install(
+    FILES ${MS_PY_LIST}
+    DESTINATION ${INSTALL_PY_DIR}
+    COMPONENT mindspore
+)
+
+install(
+    DIRECTORY
+        ${CMAKE_SOURCE_DIR}/mindspore/nn
+        ${CMAKE_SOURCE_DIR}/mindspore/_extends
+        ${CMAKE_SOURCE_DIR}/mindspore/parallel
+        ${CMAKE_SOURCE_DIR}/mindspore/mindrecord
+        ${CMAKE_SOURCE_DIR}/mindspore/train
+        ${CMAKE_SOURCE_DIR}/mindspore/model_zoo
+        ${CMAKE_SOURCE_DIR}/mindspore/common
+        ${CMAKE_SOURCE_DIR}/mindspore/ops
+        ${CMAKE_SOURCE_DIR}/mindspore/communication
+    DESTINATION ${INSTALL_PY_DIR}
+    COMPONENT mindspore
+)
+
+if (ENABLE_GPU)
+    install(
+        DIRECTORY ${CMAKE_SOURCE_DIR}/mindspore/_akg
+        DESTINATION ${INSTALL_PY_DIR}/../
+        COMPONENT mindspore
+    )
+    if (EXISTS ${incubator_tvm_gpu_ROOT})
+        file(GLOB_RECURSE GLOG_LIB_LIST ${incubator_tvm_gpu_LIBPATH}/lib*)
+        install(
+                FILES ${GLOG_LIB_LIST}
+                DESTINATION ${INSTALL_LIB_DIR}
+                COMPONENT mindspore
+        )
+        install(
+            DIRECTORY
+                ${incubator_tvm_gpu_ROOT}/topi/python/topi
+                ${incubator_tvm_gpu_ROOT}/python/tvm
+            DESTINATION ${INSTALL_PY_DIR}/../_akg
+            COMPONENT mindspore
+        )
+    endif ()
+endif ()
+
+if (EXISTS ${CMAKE_SOURCE_DIR}/mindspore/dataset)
+    install(
+        DIRECTORY ${CMAKE_SOURCE_DIR}/mindspore/dataset
+        DESTINATION ${INSTALL_PY_DIR}
+        COMPONENT mindspore
+    )
+endif ()
--- a/cmake/package_script.cmake
+++ b/cmake/package_script.cmake
@ -0,0 +1,90 @@
+# find exec
+find_package(Python3 3.7 COMPONENTS Interpreter Development)
+if (NOT Python3_FOUND)
+    message("No python3 found.")
+    return ()
+endif ()
+
+set(PYTHON ${Python3_EXECUTABLE})
+set(PYTHON_VERSION ${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR})
+
+find_package(Git)
+if (NOT GIT_FOUND)
+    message("No git found.")
+    return ()
+endif ()
+set(GIT ${GIT_EXECUTABLE})
+
+# set path
+set(MS_ROOT_DIR ${CPACK_PACKAGE_DIRECTORY}/../../)
+set(MS_PACK_ROOT_DIR ${MS_ROOT_DIR}/build/package)
+
+# set package file name
+if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+    if (PYTHON_VERSION MATCHES "3.7")
+        set(PY_TAGS "cp37-cp37m")
+    else ()
+        message("Could not find 'Python 3.7'")
+        return()
+    endif ()
+    string(TOLOWER linux_${CMAKE_HOST_SYSTEM_PROCESSOR} PLATFORM_TAG)
+elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
+    if (PYTHON_VERSION MATCHES "3.7")
+        set(PY_TAGS "py37-none")
+    else ()
+        message("Could not find 'Python 3.7'")
+        return()
+    endif ()
+    set(PLATFORM_TAG "any")
+elseif (CMAKE_SYSTEM_NAME MATCHES "Windows")
+    if (PYTHON_VERSION MATCHES "3.7")
+        set(PY_TAGS "cp37-cp37m")
+    else ()
+        message("Could not find 'Python 3.7'")
+        return()
+    endif ()
+    set(PLATFORM_TAG "win_amd64")
+else ()
+    message(FATAL_ERROR "other platform: ${CMAKE_SYSTEM_NAME}")
+endif ()
+
+# get git commit id
+set(GIT_COMMIT_ID "")
+execute_process(
+    COMMAND ${GIT} log --format='[sha1]:%h,[branch]:%d' -1
+    OUTPUT_VARIABLE GIT_COMMIT_ID
+    WORKING_DIRECTORY ${MS_ROOT_DIR}
+    ERROR_QUIET)
+string(REPLACE " " "" GIT_COMMIT_ID ${GIT_COMMIT_ID})
+
+set(ENV{BACKEND_POLICY} ${CPACK_MS_BACKEND})
+set(ENV{MS_PACKAGE_NAME} ${CPACK_MS_PACKAGE_NAME})
+set(ENV{COMMIT_ID} ${GIT_COMMIT_ID})
+
+execute_process(
+    COMMAND ${PYTHON} ${MS_ROOT_DIR}/setup.py "bdist_wheel"
+    WORKING_DIRECTORY ${MS_PACK_ROOT_DIR}
+)
+
+# finally
+set(PACKAGE_NAME ${CPACK_MS_PACKAGE_NAME})
+if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
+    string(REPLACE "-" "_" PACKAGE_NAME ${PACKAGE_NAME})
+    execute_process(
+        COMMAND chmod -R 700 ${MS_PACK_ROOT_DIR}/mindspore/
+        COMMAND chmod -R 700 ${MS_PACK_ROOT_DIR}/${PACKAGE_NAME}.egg-info/
+    )
+endif ()
+
+file(GLOB WHL_FILE ${MS_PACK_ROOT_DIR}/dist/*.whl)
+get_filename_component(ORIGIN_FILE_NAME ${WHL_FILE} NAME)
+string(REPLACE "-" ";" ORIGIN_FILE_NAME ${ORIGIN_FILE_NAME})
+list(GET ORIGIN_FILE_NAME 1 VERSION)
+set(NEW_FILE_NAME ${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG}.whl)
+file(RENAME ${WHL_FILE} ${MS_PACK_ROOT_DIR}/${NEW_FILE_NAME})
+file(REMOVE_RECURSE ${MS_ROOT_DIR}/output)
+file(MAKE_DIRECTORY ${MS_ROOT_DIR}/output)
+file(COPY ${MS_PACK_ROOT_DIR}/${NEW_FILE_NAME} DESTINATION ${MS_ROOT_DIR}/output/)
+
+file(SHA256 ${MS_ROOT_DIR}/output/${NEW_FILE_NAME} SHA256_VAR)
+file(WRITE ${MS_ROOT_DIR}/output/${NEW_FILE_NAME}.sha256 ${SHA256_VAR} " " ${NEW_FILE_NAME})
--- a/cmake/utils.cmake
+++ b/cmake/utils.cmake
@ -1,6 +1,10 @@
 include(FetchContent)
 set(FETCHCONTENT_QUIET OFF)

+if (CMAKE_SYSTEM_NAME MATCHES "Windows" AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL 3.17.0)
+    set(CMAKE_FIND_LIBRARY_SUFFIXES .dll ${CMAKE_FIND_LIBRARY_SUFFIXES})
+endif ()
+
 function(mindspore_add_submodule_obj des_submodule_objs sub_dir submodule_name_obj)

    add_subdirectory(${sub_dir})
@ -103,7 +107,7 @@ function(__download_pkg_with_git pkg_name pkg_url pkg_git_commit pkg_md5)
 endfunction()


-function(__find_pkg_then_add_target pkg_name pkg_exe)
+function(__find_pkg_then_add_target pkg_name pkg_exe lib_path)

    unset(${pkg_name}_LIBS)

@ -129,15 +133,24 @@ function(__find_pkg_then_add_target pkg_name pkg_exe)
            set(_LIB_TYPE STATIC)
        endif ()
        set(${_LIB_NAME}_LIB ${_LIB_NAME}_LIB-NOTFOUND)
-        find_library(${_LIB_NAME}_LIB ${_LIB_SEARCH_NAME} PATHS ${${pkg_name}_BASE_DIR}/lib NO_DEFAULT_PATH)
+        find_library(${_LIB_NAME}_LIB ${_LIB_SEARCH_NAME} PATHS ${${pkg_name}_BASE_DIR}/${lib_path} NO_DEFAULT_PATH)
+
        if(NOT ${_LIB_NAME}_LIB)
            return()
        endif()
+
        add_library(${pkg_name}::${_LIB_NAME} ${_LIB_TYPE} IMPORTED GLOBAL)
-        set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES
-                INTERFACE_INCLUDE_DIRECTORIES "${${pkg_name}_BASE_DIR}/include"
-                IMPORTED_LOCATION ${${_LIB_NAME}_LIB}
-                )
+        if (WIN32 AND ${_LIB_TYPE} STREQUAL "SHARED")
+            set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES IMPORTED_IMPLIB_RELEASE ${${_LIB_NAME}_LIB})
+        else()
+            set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES IMPORTED_LOCATION ${${_LIB_NAME}_LIB})
+        endif()
+
+        if (EXISTS ${${pkg_name}_BASE_DIR}/include)
+            set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES 
+                INTERFACE_INCLUDE_DIRECTORIES "${${pkg_name}_BASE_DIR}/include")
+        endif ()
+
        list(APPEND ${pkg_name}_LIBS ${pkg_name}::${_LIB_NAME})
        message("found ${${_LIB_NAME}_LIB}")
        STRING( REGEX REPLACE "(.+)/(.+)" "\\1" LIBPATH ${${_LIB_NAME}_LIB})
@ -192,10 +205,18 @@ set(MS_FIND_NO_DEFAULT_PATH ${MS_FIND_NO_DEFAULT_PATH} PARENT_SCOPE)
 function(mindspore_add_pkg pkg_name )

    set(options )
-    set(oneValueArgs URL MD5 GIT_REPOSITORY GIT_TAG VER EXE DIR HEAD_ONLY)
-    set(multiValueArgs CMAKE_OPTION LIBS PRE_CONFIGURE_COMMAND CONFIGURE_COMMAND BUILD_OPTION INSTALL_INCS INSTALL_LIBS PATCHES)
+    set(oneValueArgs URL MD5 GIT_REPOSITORY GIT_TAG VER EXE DIR HEAD_ONLY CMAKE_PATH RELEASE LIB_PATH)
+    set(multiValueArgs CMAKE_OPTION LIBS PRE_CONFIGURE_COMMAND CONFIGURE_COMMAND BUILD_OPTION INSTALL_INCS INSTALL_LIBS PATCHES SUBMODULES SOURCEMODULES)
    cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )

+    if (NOT PKG_LIB_PATH)
+        set(PKG_LIB_PATH lib)
+    endif ()
+
+    if(NOT PKG_EXE)
+        set(PKG_EXE 0)
+    endif()
+
    set(__FIND_PKG_NAME ${pkg_name})
    string(TOLOWER ${pkg_name} pkg_name)
    message("pkg name:${__FIND_PKG_NAME},${pkg_name}")
@ -223,18 +244,17 @@ function(mindspore_add_pkg pkg_name )
        set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/${PKG_HEAD_ONLY} PARENT_SCOPE)
        add_library(${pkg_name} INTERFACE)
        target_include_directories(${pkg_name} INTERFACE ${${pkg_name}_INC})
+        if (${PKG_RELEASE})
+            __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIB_PATH} ${PKG_LIBS})
+        endif ()
        return()
    endif ()

-    if(NOT PKG_EXE)
-        set(PKG_EXE 0)
-    endif()
-
    set(${__FIND_PKG_NAME}_ROOT ${${pkg_name}_BASE_DIR})
    set(${__FIND_PKG_NAME}_ROOT ${${pkg_name}_BASE_DIR} PARENT_SCOPE)

    if (PKG_LIBS)
-        __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIBS})
+        __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIB_PATH} ${PKG_LIBS})
        if(${pkg_name}_LIBS)
            set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE)
            message("Found libs: ${${pkg_name}_LIBS}")
@ -250,11 +270,21 @@ function(mindspore_add_pkg pkg_name )
    endif ()

    if (NOT PKG_DIR)
-	if (PKG_GIT_REPOSITORY)
-	    __download_pkg_with_git(${pkg_name} ${PKG_GIT_REPOSITORY} ${PKG_GIT_TAG} ${PKG_MD5})
-	else()
+        if (PKG_GIT_REPOSITORY)
+            __download_pkg_with_git(${pkg_name} ${PKG_GIT_REPOSITORY} ${PKG_GIT_TAG} ${PKG_MD5})
+        else()
            __download_pkg(${pkg_name} ${PKG_URL} ${PKG_MD5})
-	endif()
+        endif()
+        foreach(_SUBMODULE_FILE ${PKG_SUBMODULES})
+            STRING( REGEX REPLACE "(.+)_(.+)" "\\1" _SUBMODEPATH ${_SUBMODULE_FILE})
+            STRING( REGEX REPLACE "(.+)/(.+)" "\\2" _SUBMODENAME ${_SUBMODEPATH})
+            file(GLOB ${pkg_name}_INSTALL_SUBMODULE ${_SUBMODULE_FILE}/*)
+            file(COPY ${${pkg_name}_INSTALL_SUBMODULE} DESTINATION ${${pkg_name}_SOURCE_DIR}/3rdparty/${_SUBMODENAME})
+        endforeach (_SUBMODULE_FILE)
+        foreach(_SOURCE_DIR ${PKG_SOURCEMODULES})
+            file(GLOB ${pkg_name}_INSTALL_SOURCE ${${pkg_name}_SOURCE_DIR}/${_SOURCE_DIR}/*)
+            file(COPY ${${pkg_name}_INSTALL_SOURCE} DESTINATION ${${pkg_name}_BASE_DIR}/${_SOURCE_DIR}/)
+        endforeach (_SUBMODULE_FILE)
    else()
        set(${pkg_name}_SOURCE_DIR ${PKG_DIR})
    endif ()
@ -262,12 +292,16 @@ function(mindspore_add_pkg pkg_name )
    message("${pkg_name}_SOURCE_DIR : ${${pkg_name}_SOURCE_DIR}")

    foreach(_PATCH_FILE ${PKG_PATCHES})
-        message("patching ${${pkg_name}_SOURCE_DIR} -p1 < ${_PATCH_FILE}")
-        execute_process(COMMAND patch -p1 INPUT_FILE ${_PATCH_FILE}
+        get_filename_component(_PATCH_FILE_NAME ${_PATCH_FILE} NAME)
+        set(_LF_PATCH_FILE ${CMAKE_BINARY_DIR}/_ms_patch/${_PATCH_FILE_NAME})
+        configure_file(${_PATCH_FILE} ${_LF_PATCH_FILE} NEWLINE_STYLE LF)
+
+        message("patching ${${pkg_name}_SOURCE_DIR} -p1 < ${_LF_PATCH_FILE}")
+        execute_process(COMMAND ${Patch_EXECUTABLE} -p1 INPUT_FILE ${_LF_PATCH_FILE}
                WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}
                RESULT_VARIABLE Result)
        if(NOT Result EQUAL "0")
-            message(FATAL_ERROR "Failed patch: ${_PATCH_FILE}")
+            message(FATAL_ERROR "Failed patch: ${_LF_PATCH_FILE}")
        endif()
    endforeach(_PATCH_FILE)
        
@ -281,8 +315,10 @@ function(mindspore_add_pkg pkg_name )
            file(GLOB ${pkg_name}_SOURCE_SUBDIRS ${${pkg_name}_SOURCE_DIR}/*)
            file(COPY ${${pkg_name}_SOURCE_SUBDIRS} DESTINATION ${${pkg_name}_BASE_DIR})
            set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/${PKG_HEAD_ONLY} PARENT_SCOPE)
-            add_library(${pkg_name} INTERFACE)
-            target_include_directories(${pkg_name} INTERFACE ${${pkg_name}_INC})
+            if (NOT PKG_RELEASE)
+                add_library(${pkg_name} INTERFACE)
+                target_include_directories(${pkg_name} INTERFACE ${${pkg_name}_INC})
+            endif ()

        elseif (PKG_CMAKE_OPTION)
            # in cmake
@ -304,7 +340,7 @@ function(mindspore_add_pkg pkg_name )

            __exec_cmd(COMMAND ${CMAKE_COMMAND} ${PKG_CMAKE_OPTION} -G ${CMAKE_GENERATOR}
                    ${${pkg_name}_CMAKE_CFLAGS} ${${pkg_name}_CMAKE_CXXFLAGS} ${${pkg_name}_CMAKE_LDFLAGS}
-                    -DCMAKE_INSTALL_PREFIX=${${pkg_name}_BASE_DIR} ..
+                    -DCMAKE_INSTALL_PREFIX=${${pkg_name}_BASE_DIR} ${${pkg_name}_SOURCE_DIR}/${PKG_CMAKE_PATH}
                    WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build)

            __exec_cmd(COMMAND ${CMAKE_COMMAND} --build . --target install -- -j${THNUM}
@ -353,7 +389,7 @@ function(mindspore_add_pkg pkg_name )
    endif()

    if (PKG_LIBS)
-        __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIBS})
+        __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIB_PATH} ${PKG_LIBS})
        set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE)
        if(NOT ${pkg_name}_LIBS)
            message(FATAL_ERROR "Can not find pkg: ${pkg_name}")
--- a/docker/README.md
+++ b/docker/README.md
@ -7,17 +7,11 @@ This folder hosts all the `Dockerfile` to build MindSpore container images with
 * CPU

    ```
-    cd mindspore-cpu && docker build . -t mindspore/mindspore-cpu:0.1.0-alpha
+    cd mindspore-cpu/0.1.0-alpha && docker build . -t mindspore/mindspore-cpu:0.1.0-alpha
    ```

-* GPU (CUDA 9.2)
+* GPU

    ```
-    cd mindspore-cuda9.2 && docker build . -t mindspore/mindspore-cuda9.2:0.1.0-alpha
-    ```
-
-* GPU (CUDA 10.1)
-
-    ```
-    cd mindspore-cuda10.1 && docker build . -t mindspore/mindspore-cuda10.1:0.1.0-alpha
+    cd mindspore-gpu/0.1.0-alpha && docker build . -t mindspore/mindspore-gpu:0.1.0-alpha
    ```
--- a/docker/mindspore-cpu/0.1.0-alpha/Dockerfile
+++ b/docker/mindspore-cpu/0.1.0-alpha/Dockerfile
@ -1,11 +1,10 @@
-FROM nvidia/cuda:9.2-cudnn7-devel-ubuntu18.04
+FROM ubuntu:18.04

 MAINTAINER leonwanghui <leon.wanghui@huawei.com>

 # Set env
 ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
-ENV CMAKE_ROOT_PATH /usr/local/cmake-3.14.1
-ENV PATH ${PYTHON_ROOT_PATH}/bin:${CMAKE_ROOT_PATH}/bin:/usr/local/bin:$PATH
+ENV PATH /usr/local/bin:$PATH

 # Install base tools
 RUN apt update \
@ -64,20 +63,5 @@ RUN mkdir -pv /root/.pip \
    && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf

-# Install pip package
-RUN pip install --no-cache-dir \
-    numpy \
-    wheel \
-    nose \
-    pytest \
-    pytest-xdist
-
-# Install cmake (v3.14.1)
-RUN cd /tmp \
-    && wget https://github.com/Kitware/CMake/releases/download/v3.14.1/cmake-3.14.1-Linux-x86_64.sh \
-    && mkdir -p ${CMAKE_ROOT_PATH} \
-    && bash ./cmake-3.14.1-Linux-x86_64.sh --prefix=${CMAKE_ROOT_PATH} --exclude-subdir --skip-license \
-    && rm -f /tmp/cmake-3.14.1-Linux-x86_64.sh
-
-# Install MindSpore cuda-9.2 whl package
-RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/gpu/cuda-9.2/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
+# Install MindSpore cpu whl package
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
--- a/docker/mindspore-cpu/devel/Dockerfile
+++ b/docker/mindspore-cpu/devel/Dockerfile
@ -62,15 +62,8 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
 RUN mkdir -pv /root/.pip \
    && echo "[global]" > /root/.pip/pip.conf \
    && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
-    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
-
-# Install pip package
-RUN pip install --no-cache-dir \
-    numpy \
-    wheel \
-    nose \
-    pytest \
-    pytest-xdist
+    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf \
+    && pip install --no-cache-dir wheel

 # Install cmake (v3.14.1)
 RUN cd /tmp \
@ -78,6 +71,3 @@ RUN cd /tmp \
    && mkdir -p ${CMAKE_ROOT_PATH} \
    && bash ./cmake-3.14.1-Linux-x86_64.sh --prefix=${CMAKE_ROOT_PATH} --exclude-subdir --skip-license \
    && rm -f /tmp/cmake-3.14.1-Linux-x86_64.sh
-
-# Install MindSpore cpu whl package
-RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
--- a/docker/mindspore-cpu/runtime/Dockerfile
+++ b/docker/mindspore-cpu/runtime/Dockerfile
@ -0,0 +1,64 @@
+FROM ubuntu:18.04
+
+MAINTAINER leonwanghui <leon.wanghui@huawei.com>
+
+# Set env
+ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
+ENV PATH /usr/local/bin:$PATH
+
+# Install base tools
+RUN apt update \
+    && DEBIAN_FRONTEND=noninteractive apt install -y \
+    vim \
+    wget \
+    curl \
+    xz-utils \
+    net-tools \
+    openssh-client \
+    git \
+    ntpdate \
+    tzdata \
+    tcl \
+    sudo \
+    bash-completion
+
+# Install compile tools
+RUN DEBIAN_FRONTEND=noninteractive apt install -y \
+    gcc \
+    g++ \
+    zlibc \
+    make \
+    libgmp-dev \
+    patch \
+    autoconf \
+    libtool \
+    automake \
+    flex
+
+# Set bash
+RUN echo "dash dash/sh boolean false" | debconf-set-selections
+RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
+
+# Install python (v3.7.5)
+RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
+    libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
+    && cd /tmp \
+    && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
+    && tar -xvf v3.7.5.tar.gz \
+    && cd /tmp/cpython-3.7.5 \
+    && mkdir -p ${PYTHON_ROOT_PATH} \
+    && ./configure --prefix=${PYTHON_ROOT_PATH} \
+    && make -j4 \
+    && make install -j4 \
+    && rm -f /usr/local/bin/python \
+    && rm -f /usr/local/bin/pip \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
+    && rm -rf /tmp/cpython-3.7.5 \
+    && rm -f /tmp/v3.7.5.tar.gz
+
+# Set pip source
+RUN mkdir -pv /root/.pip \
+    && echo "[global]" > /root/.pip/pip.conf \
+    && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
+    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
--- a/docker/mindspore-gpu/0.1.0-alpha/Dockerfile
+++ b/docker/mindspore-gpu/0.1.0-alpha/Dockerfile
@ -0,0 +1,83 @@
+FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
+
+MAINTAINER leonwanghui <leon.wanghui@huawei.com>
+
+# Set env
+ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
+ENV OMPI_ROOT_PATH /usr/local/openmpi-3.1.5
+ENV PATH ${OMPI_ROOT_PATH}/bin:/usr/local/bin:$PATH
+ENV LD_LIBRARY_PATH ${OMPI_ROOT_PATH}/lib:$LD_LIBRARY_PATH
+
+# Install base tools
+RUN apt update \
+    && DEBIAN_FRONTEND=noninteractive apt install -y \
+    vim \
+    wget \
+    curl \
+    xz-utils \
+    net-tools \
+    openssh-client \
+    git \
+    ntpdate \
+    tzdata \
+    tcl \
+    sudo \
+    bash-completion
+
+# Install compile tools
+RUN DEBIAN_FRONTEND=noninteractive apt install -y \
+    gcc \
+    g++ \
+    zlibc \
+    make \
+    libgmp-dev \
+    patch \
+    autoconf \
+    libtool \
+    automake \
+    flex \
+    libnccl2=2.4.8-1+cuda10.1 \
+    libnccl-dev=2.4.8-1+cuda10.1
+
+# Set bash
+RUN echo "dash dash/sh boolean false" | debconf-set-selections
+RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
+
+# Install python (v3.7.5)
+RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
+    libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
+    && cd /tmp \
+    && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
+    && tar -xvf v3.7.5.tar.gz \
+    && cd /tmp/cpython-3.7.5 \
+    && mkdir -p ${PYTHON_ROOT_PATH} \
+    && ./configure --prefix=${PYTHON_ROOT_PATH} \
+    && make -j4 \
+    && make install -j4 \
+    && rm -f /usr/local/bin/python \
+    && rm -f /usr/local/bin/pip \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
+    && rm -rf /tmp/cpython-3.7.5 \
+    && rm -f /tmp/v3.7.5.tar.gz
+
+# Set pip source
+RUN mkdir -pv /root/.pip \
+    && echo "[global]" > /root/.pip/pip.conf \
+    && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
+    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
+
+# Install openmpi (v3.1.5)
+RUN cd /tmp \
+    && wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.5.tar.gz \
+    && tar -xvf openmpi-3.1.5.tar.gz \
+    && cd /tmp/openmpi-3.1.5 \
+    && mkdir -p ${OMPI_ROOT_PATH} \
+    && ./configure --prefix=${OMPI_ROOT_PATH} \
+    && make -j4 \
+    && make install -j4 \
+    && rm -rf /tmp/openmpi-3.1.5 \
+    && rm -f /tmp/openmpi-3.1.5.tar.gz
+
+# Install MindSpore cuda-10.1 whl package
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
--- a/docker/mindspore-gpu/devel/Dockerfile
+++ b/docker/mindspore-gpu/devel/Dockerfile
@ -5,7 +5,7 @@ MAINTAINER leonwanghui <leon.wanghui@huawei.com>
 # Set env
 ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
 ENV CMAKE_ROOT_PATH /usr/local/cmake-3.14.1
-ENV PATH ${PYTHON_ROOT_PATH}/bin:${CMAKE_ROOT_PATH}/bin:/usr/local/bin:$PATH
+ENV PATH ${CMAKE_ROOT_PATH}/bin:/usr/local/bin:$PATH

 # Install base tools
 RUN apt update \
@ -36,6 +36,9 @@ RUN DEBIAN_FRONTEND=noninteractive apt install -y \
    automake \
    flex

+# Configure cuDNN (v7.6.5)
+RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 /usr/local/cuda/lib64/libcudnn.so
+
 # Set bash
 RUN echo "dash dash/sh boolean false" | debconf-set-selections
 RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
@ -62,15 +65,8 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
 RUN mkdir -pv /root/.pip \
    && echo "[global]" > /root/.pip/pip.conf \
    && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
-    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
-
-# Install pip package
-RUN pip install --no-cache-dir \
-    numpy \
-    wheel \
-    nose \
-    pytest \
-    pytest-xdist
+    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf \
+    && pip install --no-cache-dir wheel

 # Install cmake (v3.14.1)
 RUN cd /tmp \
@ -78,6 +74,3 @@ RUN cd /tmp \
    && mkdir -p ${CMAKE_ROOT_PATH} \
    && bash ./cmake-3.14.1-Linux-x86_64.sh --prefix=${CMAKE_ROOT_PATH} --exclude-subdir --skip-license \
    && rm -f /tmp/cmake-3.14.1-Linux-x86_64.sh
-
-# Install MindSpore cuda-10.1 whl package
-RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
--- a/docker/mindspore-gpu/runtime/Dockerfile
+++ b/docker/mindspore-gpu/runtime/Dockerfile
@ -0,0 +1,80 @@
+FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
+
+MAINTAINER leonwanghui <leon.wanghui@huawei.com>
+
+# Set env
+ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
+ENV OMPI_ROOT_PATH /usr/local/openmpi-3.1.5
+ENV PATH ${OMPI_ROOT_PATH}/bin:/usr/local/bin:$PATH
+ENV LD_LIBRARY_PATH ${OMPI_ROOT_PATH}/lib:$LD_LIBRARY_PATH
+
+# Install base tools
+RUN apt update \
+    && DEBIAN_FRONTEND=noninteractive apt install -y \
+    vim \
+    wget \
+    curl \
+    xz-utils \
+    net-tools \
+    openssh-client \
+    git \
+    ntpdate \
+    tzdata \
+    tcl \
+    sudo \
+    bash-completion
+
+# Install compile tools
+RUN DEBIAN_FRONTEND=noninteractive apt install -y \
+    gcc \
+    g++ \
+    zlibc \
+    make \
+    libgmp-dev \
+    patch \
+    autoconf \
+    libtool \
+    automake \
+    flex \
+    libnccl2=2.4.8-1+cuda10.1 \
+    libnccl-dev=2.4.8-1+cuda10.1
+
+# Set bash
+RUN echo "dash dash/sh boolean false" | debconf-set-selections
+RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
+
+# Install python (v3.7.5)
+RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
+    libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
+    && cd /tmp \
+    && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
+    && tar -xvf v3.7.5.tar.gz \
+    && cd /tmp/cpython-3.7.5 \
+    && mkdir -p ${PYTHON_ROOT_PATH} \
+    && ./configure --prefix=${PYTHON_ROOT_PATH} \
+    && make -j4 \
+    && make install -j4 \
+    && rm -f /usr/local/bin/python \
+    && rm -f /usr/local/bin/pip \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
+    && rm -rf /tmp/cpython-3.7.5 \
+    && rm -f /tmp/v3.7.5.tar.gz
+
+# Set pip source
+RUN mkdir -pv /root/.pip \
+    && echo "[global]" > /root/.pip/pip.conf \
+    && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
+    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
+
+# Install openmpi (v3.1.5)
+RUN cd /tmp \
+    && wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.5.tar.gz \
+    && tar -xvf openmpi-3.1.5.tar.gz \
+    && cd /tmp/openmpi-3.1.5 \
+    && mkdir -p ${OMPI_ROOT_PATH} \
+    && ./configure --prefix=${OMPI_ROOT_PATH} \
+    && make -j4 \
+    && make install -j4 \
+    && rm -rf /tmp/openmpi-3.1.5 \
+    && rm -f /tmp/openmpi-3.1.5.tar.gz
--- a/example/Bert_NEZHA_cnwiki/train.py
+++ b/example/Bert_NEZHA_cnwiki/train.py
@ -39,6 +39,7 @@ import mindspore.dataset.engine.datasets as de
 import mindspore.dataset.transforms.c_transforms as C
 from mindspore import context
 from mindspore.common.tensor import Tensor
+import mindspore.common.dtype as mstype
 from mindspore.train.model import Model
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
 from mindspore.model_zoo.Bert_NEZHA import BertNetworkWithLoss, BertTrainOneStepCell
@ -49,9 +50,9 @@ def create_train_dataset(batch_size):
    """create train dataset"""
    # apply repeat operations
    repeat_count = bert_train_cfg.epoch_size
-    ds = de.StorageDataset([bert_train_cfg.DATA_DIR], bert_train_cfg.SCHEMA_DIR,
-                           columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
-                                         "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"])
+    ds = de.TFRecordDataset([bert_train_cfg.DATA_DIR], bert_train_cfg.SCHEMA_DIR,
+                            columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
+                                          "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"])
    type_cast_op = C.TypeCast(mstype.int32)
    ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
    ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
--- a/example/yolov3_coco2017/dataset.py
+++ b/example/yolov3_coco2017/dataset.py
@ -22,7 +22,6 @@ from PIL import Image
 from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
 import mindspore.dataset as de
 from mindspore.mindrecord import FileWriter
-import mindspore.dataset.transforms.vision.py_transforms as P
 import mindspore.dataset.transforms.vision.c_transforms as C
 from config import ConfigYOLOV3ResNet18

@ -301,13 +300,12 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=10, device_num
    compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))

    if is_training:
-        hwc_to_chw = P.HWC2CHW()
+        hwc_to_chw = C.HWC2CHW()
        ds = ds.map(input_columns=["image", "annotation"],
                    output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
                    columns_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
                    operations=compose_map_func, num_parallel_workers=num_parallel_workers)
        ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers)
-        ds = ds.shuffle(buffer_size=256)
        ds = ds.batch(batch_size, drop_remainder=True)
        ds = ds.repeat(repeat_num)
    else:
--- a/example/yolov3_coco2017/run_distribute_train.sh
+++ b/example/yolov3_coco2017/run_distribute_train.sh
@ -19,6 +19,7 @@ echo "Please run the scipt as: "
 echo "sh run_distribute_train.sh DEVICE_NUM EPOCH_SIZE MINDRECORD_DIR IMAGE_DIR ANNO_PATH MINDSPORE_HCCL_CONFIG_PATH"
 echo "for example: sh run_distribute_train.sh 8 100 /data/Mindrecord_train /data /data/train.txt /data/hccl.json"
 echo "It is better to use absolute path."
+echo "The learning rate is 0.005 as default, if you want other lr, please change the value in this script."
 echo "=============================================================================================================="

 EPOCH_SIZE=$2
@ -38,6 +39,11 @@ export RANK_SIZE=$1
 for((i=0;i<RANK_SIZE;i++))
 do
    export DEVICE_ID=$i
+
+    start=`expr $i \* 12`
+    end=`expr $start \+ 11`
+    cmdopt=$start"-"$end
+
    rm -rf LOG$i
    mkdir ./LOG$i
    cp  *.py ./LOG$i
@ -45,8 +51,9 @@ do
    export RANK_ID=$i
    echo "start training for rank $i, device $DEVICE_ID"
    env > env.log
-    python ../train.py  \
+    taskset -c $cmdopt python ../train.py  \
    --distribute=1  \
+    --lr=0.005 \
    --device_num=$RANK_SIZE  \
    --device_id=$DEVICE_ID  \
    --mindrecord_dir=$MINDRECORD_DIR  \
--- a/example/yolov3_coco2017/train.py
+++ b/example/yolov3_coco2017/train.py
@ -67,6 +67,7 @@ if __name__ == '__main__':
    parser.add_argument("--distribute", type=bool, default=False, help="Run distribute, default is false.")
    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
    parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.")
+    parser.add_argument("--lr", type=float, default=0.001, help="Learning rate, default is 0.001.")
    parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or not, default is sink")
    parser.add_argument("--epoch_size", type=int, default=10, help="Epoch size, default is 10")
    parser.add_argument("--batch_size", type=int, default=32, help="Batch size, default is 32.")
@ -137,8 +138,8 @@ if __name__ == '__main__':
        ckpt_config = CheckpointConfig(save_checkpoint_steps=dataset_size * args_opt.save_checkpoint_epochs)
        ckpoint_cb = ModelCheckpoint(prefix="yolov3", directory=None, config=ckpt_config)

-        lr = Tensor(get_lr(learning_rate=0.001, start_step=0, global_step=args_opt.epoch_size * dataset_size,
-                           decay_step=1000, decay_rate=0.95))
+        lr = Tensor(get_lr(learning_rate=args_opt.lr, start_step=0, global_step=args_opt.epoch_size * dataset_size,
+                           decay_step=1000, decay_rate=0.95, steps=True))
        opt = nn.Adam(filter(lambda x: x.requires_grad, net.get_parameters()), lr, loss_scale=loss_scale)
        net = TrainingWrapper(net, opt, loss_scale)

--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 71e3e4ac0fd9a1a229f0f07ba273162d27bdbb65
+Subproject commit 70bb745b459ff9a0e7fc1008d15fe4b510f03da7
--- a/mindspore/_akg/init.py
+++ b/mindspore/_akg/init.py
@ -18,7 +18,7 @@ import sys
 import os

 def AKGAddPath():
-    """akg add path."""
+    """_akg add path."""
    pwd = os.path.dirname(os.path.realpath(__file__))
    tvm_path = os.path.realpath(pwd)
    if tvm_path not in sys.path:
@ -32,12 +32,12 @@ class AKGMetaPathFinder:
    """class AKGMetaPath finder."""

    def find_module(self, fullname, path=None):
-        """method akg find module."""
-        if fullname.startswith("akg.tvm"):
-            rname = fullname[4:]
+        """method _akg find module."""
+        if fullname.startswith("_akg.tvm"):
+            rname = fullname[5:]
            return AKGMetaPathLoader(rname)
-        if fullname.startswith("akg.topi"):
-            rname = fullname[4:]
+        if fullname.startswith("_akg.topi"):
+            rname = fullname[5:]
            return AKGMetaPathLoader(rname)
        return None

--- a/mindspore/_akg/gpu/init.py
+++ b/mindspore/_akg/gpu/init.py
@ -26,3 +26,7 @@ from .squeeze_grad import SqueezeGrad, gpu_schedule_SqueezeGrad
 from .mean import SimpleMean, gpu_schedule_SimpleMean
 from .mean_grad import SimpleMeanGrad, gpu_schedule_SimpleMeanGrad
 from .mul import Mul, gpu_schedule_Mul
+from .hsigmoid import Hsigmoid, gpu_schedule_Hsigmoid
+from .hsigmoid_grad import HsigmoidGrad, gpu_schedule_HsigmoidGrad
+from .hswish import Hswish, gpu_schedule_Hswish
+from .hswish_grad import HswishGrad, gpu_schedule_HswishGrad
--- a/mindspore/_akg/gpu/cast.py
+++ b/mindspore/_akg/gpu/cast.py
@ -14,9 +14,9 @@

 """cast"""
 import logging
-import akg.tvm
-from akg.ops.math import cast
-from akg.topi.generic import schedule_elemwise
+import _akg.tvm
+from _akg.ops.math import cast
+from _akg.topi.generic import schedule_elemwise

 def Cast(x, dst_type):
    """cast."""
@ -34,10 +34,10 @@ def gpu_schedule_Cast(outs):
        sch (schedule.Schedule): The created schedule.
    """
    device = 'cuda'
-    ctx = akg.tvm.context(device, 0)
+    ctx = _akg.tvm.context(device, 0)
    if not ctx.exist:
        logging.info("Skip because %s is not enabled", device)
        return None
-    with akg.tvm.target.create(device):
+    with _akg.tvm.target.create(device):
        sch = schedule_elemwise(outs)
    return sch
--- a/mindspore/_akg/gpu/default_schedule.py
+++ b/mindspore/_akg/gpu/default_schedule.py
@ -15,7 +15,7 @@
 """default schedule function for GPU"""
 from queue import Queue

-import akg.tvm as tvm
+import _akg.tvm as tvm

 DEFAULT_GPU_THREAD = 1024

@ -31,7 +31,7 @@ def default_schedule(outs):
        sch (schedule.Schedule): The created schedule.
    """
    if not isinstance(outs, tvm.tensor.Tensor) and not isinstance(outs, list):
-        raise ValueError("outs should be list of akg.tvm.tensor.Tensor or akg.tvm.tensor.Tensor")
+        raise ValueError("outs should be list of _akg.tvm.tensor.Tensor or _akg.tvm.tensor.Tensor")
    device = 'cuda'
    ctx = tvm.context(device, 0)
    if not ctx.exist:
--- a/mindspore/_akg/gpu/equal.py
+++ b/mindspore/_akg/gpu/equal.py
@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """equal"""
-import akg.tvm
-from akg.ops.math import equal
-from akg.topi.generic import schedule_elemwise
+import _akg.tvm
+from _akg.ops.math import equal
+from _akg.topi.generic import schedule_elemwise

 def Equal(x, y):
    """equal."""
@ -32,9 +32,9 @@ def gpu_schedule_Equal(outs):
        sch (schedule.Schedule): The created schedule.
    """
    device = 'cuda'
-    ctx = akg.tvm.context(device, 0)
+    ctx = _akg.tvm.context(device, 0)
    if not ctx.exist:
        raise SystemError("Skip because %s is not enabled" % device)
-    with akg.tvm.target.create(device):
+    with _akg.tvm.target.create(device):
        sch = schedule_elemwise(outs)
    return sch
--- a/mindspore/_akg/gpu/hsigmoid.py
+++ b/mindspore/_akg/gpu/hsigmoid.py
@ -0,0 +1,63 @@
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""hsigmoid"""
+import _akg.topi as topi
+import _akg.tvm as tvm
+from _akg.topi import tag
+
+
+@tvm.tag_scope(tag=tag.ELEMWISE)
+def topi_nn_hsigmoid(x):
+    """
+    topi hsigmoid
+    Args:
+        x:
+
+    Returns:
+
+    """
+    return tvm.compute(x.shape, lambda *i: tvm.if_then_else(x(*i) <= -3, 0,
+                                                            tvm.if_then_else(x(*i) >= 3, 1,
+                                                                             (x(*i) + 3) / 6)))
+
+
+def Hsigmoid(x):
+    """
+    Hsigmoid
+    Args:
+        x:
+
+    Returns:
+
+    """
+    return topi_nn_hsigmoid(x)
+
+
+def gpu_schedule_Hsigmoid(outs):
+    """
+    gpu schedule Hsigmoid
+    Args:
+        outs:
+
+    Returns:
+
+    """
+    device = 'cuda'
+    ctx = tvm.context(device, 0)
+    if not ctx.exist:
+        raise SystemError("Skip because %s is not enabled" % device)
+    with tvm.target.create(device):
+        sch = topi.cuda.schedule_elemwise(outs)
+    return sch
--- a/mindspore/_akg/gpu/hsigmoid_grad.py
+++ b/mindspore/_akg/gpu/hsigmoid_grad.py
@ -0,0 +1,51 @@
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Hsigmoid grad"""
+import _akg.topi as topi
+import _akg.tvm as tvm
+
+
+def HsigmoidGrad(y_grad, x):
+    """
+    HsigmoidGrad
+    Args:
+        y_grad:
+        x:
+
+    Returns:
+
+    """
+    return tvm.compute(x.shape, lambda *i: tvm.if_then_else(x(*i) <= -3, 0,
+                                                            tvm.if_then_else(x(*i) >= 3, 0,
+                                                                             y_grad(*i) / 6)))
+
+
+def gpu_schedule_HsigmoidGrad(outs):
+    """
+    gpu schedule ReLU6Grad
+    Args:
+        outs:
+
+    Returns:
+
+    """
+    device = 'cuda'
+    ctx = tvm.context(device, 0)
+    if not ctx.exist:
+        raise SystemError("Skip because %s is not enabled" % device)
+
+    with tvm.target.create(device):
+        sch = topi.cuda.schedule_elemwise(outs)
+    return sch
--- a/mindspore/_akg/gpu/hswish.py
+++ b/mindspore/_akg/gpu/hswish.py
@ -0,0 +1,63 @@
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""hswish"""
+import _akg.topi as topi
+import _akg.tvm as tvm
+from _akg.topi import tag
+
+
+@tvm.tag_scope(tag=tag.ELEMWISE)
+def topi_nn_hswish(x):
+    """
+    topi hswish
+    Args:
+        x:
+
+    Returns:
+
+    """
+    return tvm.compute(x.shape, lambda *i: tvm.if_then_else(x(*i) <= -3, 0,
+                                                            tvm.if_then_else(x(*i) >= 3, x(*i),
+                                                                             x(*i) * (x(*i) + 3) / 6)))
+
+
+def Hswish(x):
+    """
+    Hswish
+    Args:
+        x:
+
+    Returns:
+
+    """
+    return topi_nn_hswish(x)
+
+
+def gpu_schedule_Hswish(outs):
+    """
+    gpu schedule Hswish
+    Args:
+        outs:
+
+    Returns:
+
+    """
+    device = 'cuda'
+    ctx = tvm.context(device, 0)
+    if not ctx.exist:
+        raise SystemError("Skip because %s is not enabled" % device)
+    with tvm.target.create(device):
+        sch = topi.cuda.schedule_elemwise(outs)
+    return sch
--- a/mindspore/_akg/gpu/hswish_grad.py
+++ b/mindspore/_akg/gpu/hswish_grad.py
@ -0,0 +1,53 @@
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""HswishGrad"""
+import _akg.topi as topi
+import _akg.tvm as tvm
+
+
+def HswishGrad(y_grad, x):
+    """
+    HswishGrad
+    Args:
+        y_grad:
+        x:
+
+    Returns:
+
+    """
+    shape = x.shape
+
+    res0 = tvm.compute(shape, lambda *i: tvm.if_then_else(x(*i) <= -3, 0, y_grad(*i) * (2 * x(*i) + 3) / 6))
+    res6 = tvm.compute(shape, lambda *i: tvm.if_then_else(x(*i) >= 3, y_grad(*i), res0(*i)))
+    return res6
+
+
+def gpu_schedule_HswishGrad(outs):
+    """
+    gpu schedule HswishGrad
+    Args:
+        outs:
+
+    Returns:
+
+    """
+    device = 'cuda'
+    ctx = tvm.context(device, 0)
+    if not ctx.exist:
+        raise SystemError("Skip because %s is not enabled" % device)
+
+    with tvm.target.create(device):
+        sch = topi.cuda.schedule_elemwise(outs)
+    return sch
--- a/mindspore/_akg/gpu/mean.py
+++ b/mindspore/_akg/gpu/mean.py
@ -13,8 +13,8 @@
 # limitations under the License.

 """mean op compute and schedule"""
-import akg.tvm as tvm
-from akg.ops.math.mean import mean
+import _akg.tvm as tvm
+from _akg.ops.math.mean import mean
 from .default_schedule import DEFAULT_GPU_THREAD

 def Mean(x, axis=None, keepdims=True):
--- a/mindspore/_akg/gpu/mean_grad.py
+++ b/mindspore/_akg/gpu/mean_grad.py
@ -13,9 +13,9 @@
 # limitations under the License.

 """mean_grad"""
-import akg.tvm as tvm
-import akg
-from akg.ops.math import mean
+import _akg.tvm as tvm
+import _akg
+from _akg.ops.math import mean
 from .default_schedule import DEFAULT_GPU_THREAD


@ -30,7 +30,7 @@ def mean_ad(head, input_shape, axis, keepdims):
    if tensor_b.op.name == "mean_output":
        tensor_b = tensor_b.op.input_tensors[0]

-    jacs = list(akg.differentiate(tensor_b, [tensor_a], head))
+    jacs = list(_akg.differentiate(tensor_b, [tensor_a], head))
    return jacs[0]


--- a/mindspore/_akg/gpu/mul.py
+++ b/mindspore/_akg/gpu/mul.py
@ -13,9 +13,9 @@
 # limitations under the License.

 """mul"""
-import akg.topi as topi
-import akg.tvm as tvm
-from akg.ops.math import mul
+import _akg.topi as topi
+import _akg.tvm as tvm
+from _akg.ops.math import mul

 def Mul(x, y):
    """mul."""
--- a/mindspore/_akg/gpu/relu6.py
+++ b/mindspore/_akg/gpu/relu6.py
@ -13,9 +13,9 @@
 # limitations under the License.

 """relu6"""
-import akg.topi as topi
-import akg.tvm as tvm
-from akg.topi import tag
+import _akg.topi as topi
+import _akg.tvm as tvm
+from _akg.topi import tag

@tvm.tag_scope(tag=tag.ELEMWISE)
 def topi_nn_relu6(x):
--- a/mindspore/_akg/gpu/relu6_grad.py
+++ b/mindspore/_akg/gpu/relu6_grad.py
@ -13,8 +13,8 @@
 # limitations under the License.

 """relu6 grad"""
-import akg.topi as topi
-import akg.tvm as tvm
+import _akg.topi as topi
+import _akg.tvm as tvm

 def ReLU6Grad(y_grad, x):
    """
--- a/mindspore/_akg/gpu/squeeze.py
+++ b/mindspore/_akg/gpu/squeeze.py
@ -13,8 +13,8 @@
 # limitations under the License.

 """squeeze"""
-import akg.topi as topi
-import akg.tvm as tvm
+import _akg.topi as topi
+import _akg.tvm as tvm

 def Squeeze(x, axis=None):
    """
--- a/mindspore/_akg/gpu/squeeze_grad.py
+++ b/mindspore/_akg/gpu/squeeze_grad.py
@ -13,7 +13,7 @@
 # limitations under the License.

 """squeeze grad"""
-import akg.topi as topi
+import _akg.topi as topi

 def SqueezeGrad(y_grad, x_shape, axis=None):
    """
--- a/mindspore/_akg/gpu/tile.py
+++ b/mindspore/_akg/gpu/tile.py
@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """tile"""
-import akg.tvm
-from akg.ops.array import tile
-from akg.topi.generic import schedule_elemwise
+import _akg.tvm
+from _akg.ops.array import tile
+from _akg.topi.generic import schedule_elemwise

 def Tile(x, multiples):
    """tile."""
@ -31,9 +31,9 @@ def gpu_schedule_Tile(outs):
        sch (schedule.Schedule): The created schedule.
    """
    device = 'cuda'
-    ctx = akg.tvm.context(device, 0)
+    ctx = _akg.tvm.context(device, 0)
    if not ctx.exist:
        raise SystemError("Skip because %s is not enabled" % device)
-    with akg.tvm.target.create(device):
+    with _akg.tvm.target.create(device):
        s = schedule_elemwise(outs)
    return s
--- a/mindspore/_akg/message.py
+++ b/mindspore/_akg/message.py
@ -20,9 +20,9 @@ import logging
 import traceback
 import os.path
 from pathlib import Path
-import akg.tvm
-from akg.utils import validation_check as vc_util
-from akg.utils.dsl_create import TensorUtils
+import _akg.tvm
+from _akg.utils import validation_check as vc_util
+from _akg.utils.dsl_create import TensorUtils
 from . import gpu
 from . import op_build

@ -67,7 +67,7 @@ def compilewithjson(json_str):
            tensor_shape = input_desc[0]['shape']
            tensor_shape = (1,) if not tensor_shape else tensor_shape
            vc_util.shape_dtype_max_size_check(tensor_shape)
-            args[input_desc[0]['name']] = akg.tvm.placeholder(
+            args[input_desc[0]['name']] = _akg.tvm.placeholder(
                shape=tensor_shape, name=input_desc[0]['tensor_name'], dtype=input_desc[0]['data_type'])
            tsr.append(args[input_desc[0]['name']])
        else:
@ -76,7 +76,7 @@ def compilewithjson(json_str):
                tensor_shape = tmp_desc['shape']
                tensor_shape = (1,) if not tensor_shape else tensor_shape
                vc_util.shape_dtype_max_size_check(tensor_shape)
-                tmp_input.append(akg.tvm.placeholder(
+                tmp_input.append(_akg.tvm.placeholder(
                    shape=tensor_shape, name=tmp_desc['tensor_name'], dtype=tmp_desc['data_type']))
            args[input_desc[0]['name']] = tmp_input
            tsr = tsr + tmp_input
--- a/mindspore/_akg/op_build.py
+++ b/mindspore/_akg/op_build.py
@ -19,10 +19,10 @@ import types
 import typing
 import logging
 import traceback
-import akg.tvm
-import akg
-from akg import save_gpu_param as gpu_utils
-from akg.utils import validation_check as vc_util
+import _akg.tvm
+import _akg
+from _akg import save_gpu_param as gpu_utils
+from _akg.utils import validation_check as vc_util

 MS_CUDA_KERNEL_PATH = "/tmp/cuda_meta/"

@ -38,21 +38,21 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr
            return None

        schedule_name = 'gpu_schedule_' + opnames[0]
-        schedule_func = getattr(akg.gpu, schedule_name)
+        schedule_func = getattr(_akg.gpu, schedule_name)
        if not isinstance(schedule_func, (types.FunctionType, typing.Callable)):
            logging.error("no schedule func found %s", str(schedule_name))
            return None

        ptx_file = os.path.realpath(MS_CUDA_KERNEL_PATH + kernel_name + ".ptx")
        if os.path.exists(ptx_file):
-            os.remove(ptx_file)
+            os.chmod(ptx_file, 0o600)
        try:
            with open(ptx_file, 'at') as file:
                fcntl.flock(file.fileno(), fcntl.LOCK_EX)
                file.seek(0, 2)
                if file.tell() == 0:
                    s = schedule_func(computes)
-                    foo = akg.tvm.build(s, args, device, name=kernel_name)
+                    foo = _akg.tvm.build(s, args, device, name=kernel_name)
                    ptx_code = foo.imported_modules[0].get_source("ptx")
                    file.write(ptx_code)
                    json_file = os.path.realpath(MS_CUDA_KERNEL_PATH + kernel_name + ".json")
--- a/mindspore/_akg/ops/init.py
+++ b/mindspore/_akg/ops/init.py
--- a/mindspore/_akg/ops/array/init.py
+++ b/mindspore/_akg/ops/array/init.py
--- a/mindspore/_akg/ops/array/tile.py
+++ b/mindspore/_akg/ops/array/tile.py
@ -13,12 +13,12 @@
 # limitations under the License.

 """operator dsl function: tile"""
-import akg.tvm
-import akg.topi
-from akg.utils import validation_check as vc_util
+import _akg.tvm
+import _akg.topi
+from _akg.utils import validation_check as vc_util


-@vc_util.check_input_type(akg.tvm.tensor.Tensor, (list, tuple))
+@vc_util.check_input_type(_akg.tvm.tensor.Tensor, (list, tuple))
 def tile(data, multiples):
    """
    Repeats the data in the specified dimensions according to the multiples.
@ -32,5 +32,5 @@ def tile(data, multiples):
    """
    vc_util.check_shape(data.shape)
    vc_util.check_int_list(multiples, "multiples")
-    output = akg.topi.tile(data, multiples)
+    output = _akg.topi.tile(data, multiples)
    return output
--- a/mindspore/_akg/ops/math/init.py
+++ b/mindspore/_akg/ops/math/init.py
--- a/mindspore/_akg/ops/math/cast.py
+++ b/mindspore/_akg/ops/math/cast.py
@ -13,12 +13,12 @@
 # limitations under the License.

 """operator dsl function: cast"""
-import akg.tvm
-import akg.topi
-from akg.utils import validation_check as vc_util
+import _akg.tvm
+import _akg.topi
+from _akg.utils import validation_check as vc_util


-@vc_util.check_input_type(akg.tvm.tensor.Tensor, str)
+@vc_util.check_input_type(_akg.tvm.tensor.Tensor, str)
 def cast(data, dst_type):
    """
    cast data to target type.
@ -31,6 +31,6 @@ def cast(data, dst_type):
        tvm.tensor.Tensor, type is dst_type.
    """
    vc_util.check_shape(data.shape)
-    out = akg.topi.cast(data, dst_type)
+    out = _akg.topi.cast(data, dst_type)

    return out
--- a/mindspore/_akg/ops/math/equal.py
+++ b/mindspore/_akg/ops/math/equal.py
@ -13,13 +13,13 @@
 # limitations under the License.

 """operator dsl function: equal"""
-import akg.tvm
-import akg.topi
-from akg.utils.dsl_create import produce_shapes
-from akg.utils import validation_check as vc_util
+import _akg.tvm
+import _akg.topi
+from _akg.utils.dsl_create import produce_shapes
+from _akg.utils import validation_check as vc_util


-@vc_util.check_input_type(akg.tvm.tensor.Tensor, akg.tvm.tensor.Tensor)
+@vc_util.check_input_type(_akg.tvm.tensor.Tensor, _akg.tvm.tensor.Tensor)
 def equal(input1, input2):
    """
    check whether input1 equals to input2.
@ -42,13 +42,13 @@ def equal(input1, input2):
    dtype = input1.dtype

    # get equal compute
-    t_value = akg.tvm.compute(shape, lambda *indice: akg.tvm.const(1, dtype), "T")
-    f_value = akg.tvm.compute(shape, lambda *indice: akg.tvm.const(0, dtype), "F")
+    t_value = _akg.tvm.compute(shape, lambda *indice: _akg.tvm.const(1, dtype), "T")
+    f_value = _akg.tvm.compute(shape, lambda *indice: _akg.tvm.const(0, dtype), "F")

-    input1_bro = akg.topi.broadcast_to(input1, shape)
-    input2_bro = akg.topi.broadcast_to(input2, shape)
-    c_out = akg.tvm.compute(shape, lambda *indice: akg.tvm.expr.Select(input1_bro[indice] == input2_bro[indice],
-                                                                       t_value[indice], f_value[indice]), name="C")
-    res = akg.tvm.compute(shape, lambda *indice: c_out(*indice).astype("bool"), name="res")
+    input1_bro = _akg.topi.broadcast_to(input1, shape)
+    input2_bro = _akg.topi.broadcast_to(input2, shape)
+    c_out = _akg.tvm.compute(shape, lambda *indice: _akg.tvm.expr.Select(input1_bro[indice] == input2_bro[indice],
+                                                                         t_value[indice], f_value[indice]), name="C")
+    res = _akg.tvm.compute(shape, lambda *indice: c_out(*indice).astype("bool"), name="res")

    return res
--- a/mindspore/_akg/ops/math/mean.py
+++ b/mindspore/_akg/ops/math/mean.py
@ -13,14 +13,14 @@
 # limitations under the License.

 """operator dsl function: mean"""
-import akg.topi
-import akg.tvm
-from akg.utils import format_transform as ft_util
-from akg.utils import validation_check as vc_util
-from akg.ops.math import sum
+import _akg.topi
+import _akg.tvm
+from _akg.utils import format_transform as ft_util
+from _akg.utils import validation_check as vc_util
+from _akg.ops.math import sum


-@vc_util.check_input_type(akg.tvm.tensor.Tensor, (list, tuple, int, type(None)), (bool, type(None)))
+@vc_util.check_input_type(_akg.tvm.tensor.Tensor, (list, tuple, int, type(None)), (bool, type(None)))
 def mean(data, axis=None, keepdims=False):
    """
    Computes the mean of the values of a Tensor over the whole dataset.
@ -42,6 +42,6 @@ def mean(data, axis=None, keepdims=False):
    for i in axis:
        count *= shape[i]
    output, _ = sum.sum_value(data, axis, keepdims)
-    res = akg.topi.divide(output, count)
+    res = _akg.topi.divide(output, count)

    return res
--- a/mindspore/_akg/ops/math/mul.py
+++ b/mindspore/_akg/ops/math/mul.py
@ -13,11 +13,11 @@
 # limitations under the License.

 """operator dsl function: mul"""
-import akg.topi
-from akg.utils import validation_check as vc_util
+import _akg.topi
+from _akg.utils import validation_check as vc_util


-@vc_util.check_input_type(akg.tvm.tensor.Tensor, akg.tvm.tensor.Tensor)
+@vc_util.check_input_type(_akg.tvm.tensor.Tensor, _akg.tvm.tensor.Tensor)
 def mul(l_input, r_input):
    """
    Calculate x * y element-wise.
@ -38,6 +38,6 @@ def mul(l_input, r_input):
    vc_util.check_shape(shape2)
    vc_util.auto_broadcast_check(shape1, shape2)
    vc_util.elemwise_dtype_check(l_input.dtype, r_input.dtype)
-    output = akg.topi.multiply(l_input, r_input)
+    output = _akg.topi.multiply(l_input, r_input)

    return output
--- a/mindspore/_akg/ops/math/sub.py
+++ b/mindspore/_akg/ops/math/sub.py
@ -13,12 +13,12 @@
 # limitations under the License.

 """operator dsl function: sub"""
-import akg.topi
-import akg.tvm
-from akg.utils import validation_check as vc_util
+import _akg.topi
+import _akg.tvm
+from _akg.utils import validation_check as vc_util


-@vc_util.check_input_type(akg.tvm.tensor.Tensor, akg.tvm.tensor.Tensor)
+@vc_util.check_input_type(_akg.tvm.tensor.Tensor, _akg.tvm.tensor.Tensor)
 def sub(data1, data2):
    """
    Computes data1 - data2 elementwise, broadcast is supported.
@ -35,6 +35,6 @@ def sub(data1, data2):
    vc_util.check_shape(data2.shape)
    vc_util.auto_broadcast_check(data1.shape, data2.shape)

-    res = akg.topi.subtract(data1, data2)
+    res = _akg.topi.subtract(data1, data2)

    return res
--- a/mindspore/_akg/ops/math/sum.py
+++ b/mindspore/_akg/ops/math/sum.py
@ -14,13 +14,13 @@

 """operator dsl function: sum"""

-import akg.topi
-import akg.tvm
-from akg.utils import format_transform as ft_util
-from akg.utils import validation_check as vc_util
+import _akg.topi
+import _akg.tvm
+from _akg.utils import format_transform as ft_util
+from _akg.utils import validation_check as vc_util


-@vc_util.check_input_type(akg.tvm.tensor.Tensor, (list, tuple, int, type(None)), (bool, type(None)))
+@vc_util.check_input_type(_akg.tvm.tensor.Tensor, (list, tuple, int, type(None)), (bool, type(None)))
 def sum_value(inputs, axis=None, keepdims=False):
    """
    Compute the sum of elements across dimensions of a tensor.
@ -38,8 +38,8 @@ def sum_value(inputs, axis=None, keepdims=False):
    vc_util.check_shape(inputs.shape)

    if not axis:
-        output = akg.topi.identity(inputs)
+        output = _akg.topi.identity(inputs)
    else:
-        output = akg.topi.sum(inputs, axis=axis, keepdims=keepdims)
+        output = _akg.topi.sum(inputs, axis=axis, keepdims=keepdims)

    return output
--- a/mindspore/_akg/save_gpu_param.py
+++ b/mindspore/_akg/save_gpu_param.py
@ -15,9 +15,9 @@
 """save gpu param"""
 import os
 import hashlib
-import akg.tvm
-from akg.tvm import schedule
-from akg.utils import validation_check as vc_util
+import _akg.tvm
+from _akg.tvm import schedule
+from _akg.utils import validation_check as vc_util


 def get_dim(dim, axis=True):
@ -66,7 +66,7 @@ def save_gpu_params(s, args, kernel_info):
    ptx_code = kernel_info[0]
    file_name = kernel_info[1]
    kernel_name = kernel_info[2]
-    ir = str(akg.tvm.lower(s, args, simple_mode=True))
+    ir = str(_akg.tvm.lower(s, args, simple_mode=True))
    file_path = os.path.realpath(file_name)
    if os.path.exists(file_path):
        os.remove(file_path)
--- a/mindspore/_akg/utils/init.py
+++ b/mindspore/_akg/utils/init.py
--- a/mindspore/_akg/utils/dsl_create.py
+++ b/mindspore/_akg/utils/dsl_create.py
@ -13,8 +13,8 @@
 # limitations under the License.

 """dsl create helping function"""
-import akg
-from akg.utils import format_transform as ft_util
+import _akg
+from _akg.utils import format_transform as ft_util

 class TensorUtils:
    """Class for creating tensor."""
@ -33,11 +33,11 @@ class TensorUtils:
        """update tensor attrs."""
        tensor_attrs = cls.get_tensor_attrs(tensor)
        tensor_attrs.update(attrs)
-        tensor = akg.tvm.compute(tensor.shape,
-                                 lambda *indice: tensor[indice],
-                                 name=tensor.op.name,
-                                 tag=tensor.op.tag,
-                                 attrs=tensor_attrs)
+        tensor = _akg.tvm.compute(tensor.shape,
+                                  lambda *indice: tensor[indice],
+                                  name=tensor.op.name,
+                                  tag=tensor.op.tag,
+                                  attrs=tensor_attrs)
        return tensor

    @classmethod
@ -61,7 +61,7 @@ class TensorUtils:
            raise RuntimeError("Shape of the input_tensor and the output_tensor should be equal, "
                               "but got %s and %s"%(input_tensor_shape, output_tensor_shape))
        output_tensor = cls.update_tensor_attrs(output_tensor, {cls.CREATE_SCH_ONLY: 1})
-        data_buf = akg.tvm.decl_buffer(input_tensor.shape, input_tensor.dtype, name=buffer_name)
+        data_buf = _akg.tvm.decl_buffer(input_tensor.shape, input_tensor.dtype, name=buffer_name)
        binds_info = {input_tensor: data_buf, output_tensor: data_buf}
        return output_tensor, binds_info

--- a/mindspore/_akg/utils/format_transform.py
+++ b/mindspore/_akg/utils/format_transform.py
@ -13,7 +13,7 @@
 # limitations under the License.

 """format transform function"""
-import akg
+import _akg

 def refine_reduce_axis(input, axis):
    """make reduce axis legal."""
@ -43,11 +43,11 @@ def refine_reduce_axis(input, axis):


 def get_shape_from_tensor(data):
-    """translate akg.tvm.shape to list type in python."""
+    """translate _akg.tvm.shape to list type in python."""
    tvm_shape = data.shape
    py_shape = []
    for i in tvm_shape:
-        if isinstance(i, akg.tvm.expr.Var):
+        if isinstance(i, _akg.tvm.expr.Var):
            py_shape.append(i)
        else:
            py_shape.append(i.value)
@ -55,10 +55,10 @@ def get_shape_from_tensor(data):


 def tvm_shape_to_list(tvm_shape):
-    """translate akg.tvm.shape to list type in python."""
+    """translate _akg.tvm.shape to list type in python."""
    py_shape = []
    for i in tvm_shape:
-        if isinstance(i, akg.tvm.expr.Var):
+        if isinstance(i, _akg.tvm.expr.Var):
            py_shape.append(i)
        else:
            py_shape.append(i.value)
@ -67,9 +67,9 @@ def tvm_shape_to_list(tvm_shape):

 def get_shape(data):
    """get shape and save it as list."""
-    if isinstance(data, akg.tvm.tensor.Tensor):
+    if isinstance(data, _akg.tvm.tensor.Tensor):
        shape = get_shape_from_tensor(data)
-    elif isinstance(data, akg.tvm.container.Array):
+    elif isinstance(data, _akg.tvm.container.Array):
        shape = tvm_shape_to_list(data)
    elif isinstance(data, int):
        shape = [data]
--- a/mindspore/_akg/utils/validation_check.py
+++ b/mindspore/_akg/utils/validation_check.py
@ -14,7 +14,7 @@

 """validation check functions"""
 from functools import wraps, reduce
-from akg.utils.format_transform import get_shape
+from _akg.utils.format_transform import get_shape

 MAX_DATA_SIZE = 2 ** 31

--- a/mindspore/_checkparam.py
+++ b/mindspore/_checkparam.py
@ -300,6 +300,13 @@ class ParamValidator:
        for arg, value in args.items():
            ParamValidator.check_subclass(arg, value, mstype.tensor)

+    @staticmethod
+    def check_bool(arg_name, arg_value):
+        """Check arg isintance of bool"""
+        if not isinstance(arg_value, bool):
+            raise ValueError(f'The `{arg_name}` should be isintance of bool, but got {arg_value}.')
+        return arg_value
+
    @staticmethod
    def check_type(arg_name, arg_value, valid_types):
        """Type checking."""
--- a/mindspore/_extends/builtin_operations.py
+++ b/mindspore/_extends/builtin_operations.py
@ -125,7 +125,7 @@ def list_len(x):
    return len(x)


-# only used in PyNative modes
+# only used in PyNative mode
 def partial(*args):
    """Implement `partial`."""
    func = args[0].__call__
@ -133,10 +133,14 @@ def partial(*args):
    return partial_func


-# only used in PyNative modes
+# only used in PyNative mode
 def depend(value, expr):
    return value

+# only used in PyNative mode
+def make_ref(key, value, ref):
+    return value
+

 def scalar_cast(x, t):
    """Implement scalar_cast."""
--- a/mindspore/_extends/parallel_compile/multi_compiler.py
+++ b/mindspore/_extends/parallel_compile/multi_compiler.py
@ -32,7 +32,7 @@ def _compiletask(platform, *jsons):

        """
    if platform == "AKG":
-        p = __import__("akg", globals(), locals(), ['ms'], 0)
+        p = __import__("_akg", globals(), locals(), ['ms'], 0)
        func = getattr(p.ms, "compilewithjson")
        for json_item in jsons:
            res = func(json_item)
--- a/mindspore/_extends/parse/resources.py
+++ b/mindspore/_extends/parse/resources.py
@ -83,9 +83,9 @@ convert_object_map = {
    T.mul:          multitype_ops.mul,
    T.truediv:      multitype_ops.div,
    T.getitem:      multitype_ops.getitem,
-    T.floordiv:     NO_IMPLEMENT,
-    T.mod:          F.scalar_mod,
-    T.pow:          F.scalar_pow,
+    T.floordiv:     multitype_ops.floordiv,
+    T.mod:          multitype_ops.mod,
+    T.pow:          multitype_ops.pow_,
    T.matmul:       F.dot,
    T.lshift:       NO_IMPLEMENT,
    T.rshift:       NO_IMPLEMENT,
@ -104,8 +104,8 @@ convert_object_map = {
    T.ge:           multitype_ops.greater_equal,
    T.is_:          F.is_,
    T.is_not:       F.is_not,
-    T.contains:     NO_IMPLEMENT,
-    T.not_contains: NO_IMPLEMENT,
+    T.contains:     F.in_dict,
+    T.not_contains: F.not_in_dict,

    # system function
    T.len:          M.ms_len,
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@ -5,6 +5,10 @@ if(ENABLE_CPU)
    file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
    "device/cpu/*.cc"
    )
+    if (CMAKE_SYSTEM_NAME MATCHES "Windows")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes -DHAVE_SNPRINTF")
+        add_compile_definitions(BUILDING_DLL)
+    endif()
 endif()

 if(ENABLE_GPU)
@ -150,17 +154,24 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "kernel/kash/*.cc"
        "device/gpu/distribution/collective_init.cc"
        )
-
+if (ENABLE_CPU)
+    list(REMOVE_ITEM MINDSPORE_SRC_LIST "device/gpu/distribution/collective_init.cc")
+    if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+        list(REMOVE_ITEM MINDSPORE_SRC_LIST "kernel/kernel_query.cc")
+    endif()
+endif()
+if (NOT ENABLE_GPU)
+    list(APPEND MINDSPORE_SRC_LIST "device/gpu/distribution/collective_fake_init.cc")
+endif()
 file(GLOB_RECURSE MEM_REUSE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "pre_activate/mem_reuse/*.cc"
        )
 if(NOT ENABLE_DUMP_E2E)
    list(REMOVE_ITEM MINDSPORE_SRC_LIST "debug/e2e_dump.cc")
 endif()
-
 file(COPY "${ms_onnx_INC}/onnx/onnx.proto" DESTINATION ${CMAKE_CURRENT_SOURCE_DIR})
 file(GLOB_RECURSE ONNX_PROTO RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/onnx.proto")
-message(“onnx proto path is : ${ONNX_PROTO}”)
+message("onnx proto path is : ${ONNX_PROTO}")
 ms_protobuf_generate(ONNX_PROTO_SRCS ONNX_PROTO_HDRS ${ONNX_PROTO})
 list(APPEND MINDSPORE_PROTO_LIST ${ONNX_PROTO_SRCS})

@ -249,6 +260,7 @@ file(GLOB_RECURSE MS_GVAR_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}

 add_library(mindspore_gvar SHARED ${MS_GVAR_SRC_LIST})
 add_library(mindspore STATIC ${MINDSPORE_SRC_LIST})
+add_dependencies(mindspore GENERATED_OUTPUT_DIR)

 if(ENABLE_D)
    list(APPEND MINDSPORE_PROTO_LIST ${MINDSPORE_PROTO_AICPU_LIST})
@ -302,6 +314,7 @@ if(ENABLE_D)
            set(ASCEND_PATH /usr/local/Ascend)
        endif()
        set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common)
+        set(ASCEND_DRIVER_BACK_PATH ${ASCEND_PATH}/driver/lib64/driver)
        set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64)
    endif()

@ -309,36 +322,52 @@ if(ENABLE_D)
    find_library(HCCL hccl ${ASCEND_RUNTIME_PATH})
    find_library(CCE_LIB cce ${ASCEND_RUNTIME_PATH})
    find_library(RUNTIME_LIB runtime ${ASCEND_RUNTIME_PATH})
-    find_library(TSDCLIENT tsdclient ${ASCEND_RUNTIME_PATH})
+    find_library(TSDCLIENT tsdclient HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
    find_library(PROFILING msprof ${ASCEND_DRIVER_PATH})
    target_link_libraries(mindspore ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT} ${PROFILING} ${HCCL} ${TSDCLIENT})
 endif()

 target_link_libraries(mindspore securec)
-target_link_libraries(mindspore dl)
+if (NOT WIN32)
+  target_link_libraries(mindspore dl)
+endif()
 target_link_libraries(mindspore mindspore::flatbuffers)
 # link protobuf
 if (ENABLE_D)
-    target_link_libraries(mindspore protobuf::libprotobuf)
+    target_link_libraries(mindspore mindspore::protobuf)
+endif()
+
+if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+    target_link_libraries(mindspore ${PYTHON_LIBRARIES} mindspore_gvar)
 endif()

 # set c_expression building
-set(PYTHON_MODULE_SOURCE
+if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+    set(PYTHON_MODULE_SOURCE ${MS_GVAR_SRC_LIST}
        pipeline/init.cc
        kernel/oplib/oplib.cc
-    ${MS_STEPS_SRC_LIST} ${MS_CCE_SRC_LIST} ${MS_AICPU_SRC_LIST} ${MS_TASKINFO_LIST} ${MS_RT_SRC_LIST}
-    ${GPU_NCCL_LIST} ${MS_HCCL_SRC_LIST} ${MS_PREDICT_SRC_LIST} ${CPU_SRC_LIST} ${MEM_REUSE_SRC_LIST} ${GPU_KERNEL_SRC_LIST})
+        ${MINDSPORE_SRC_LIST} ${MS_STEPS_SRC_LIST} ${MS_CCE_SRC_LIST} ${MS_AICPU_SRC_LIST} ${MS_TASKINFO_LIST} ${MS_RT_SRC_LIST}
+        ${GPU_NCCL_LIST} ${MS_HCCL_SRC_LIST} ${MS_PREDICT_SRC_LIST} ${CPU_SRC_LIST} ${MEM_REUSE_SRC_LIST} ${GPU_KERNEL_SRC_LIST})
+else()
+    set(PYTHON_MODULE_SOURCE
+        pipeline/init.cc
+        kernel/oplib/oplib.cc
+        ${MS_STEPS_SRC_LIST} ${MS_CCE_SRC_LIST} ${MS_AICPU_SRC_LIST} ${MS_TASKINFO_LIST} ${MS_RT_SRC_LIST}
+        ${GPU_NCCL_LIST} ${MS_HCCL_SRC_LIST} ${MS_PREDICT_SRC_LIST} ${CPU_SRC_LIST} ${MEM_REUSE_SRC_LIST} ${GPU_KERNEL_SRC_LIST})
+endif()

 set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
 pybind11_add_module(_c_expression ${PYTHON_MODULE_SOURCE})
-target_link_options(_c_expression PRIVATE -Wl,-init,mindspore_log_init)

 MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}")
 if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+    target_link_options(_c_expression PRIVATE -Wl,-init,mindspore_log_init)
    set(ORIGIN_PATH $ORIGIN)
 elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
    set_target_properties(_c_expression PROPERTIES MACOSX_RPATH ON)
    set(ORIGIN_PATH @loader_path)
+elseif (CMAKE_SYSTEM_NAME MATCHES "Windows")
+    set(ORIGIN_PATH $ORIGIN)
 else ()
    MESSAGE(FATAL_ERROR "other platform: ${CMAKE_SYSTEM_NAME}")
 endif ()
@ -346,125 +375,31 @@ endif ()

 set(ORIGIN_PATH ${ORIGIN_PATH}/lib)
 set_target_properties(_c_expression PROPERTIES INSTALL_RPATH ${ORIGIN_PATH})
-target_link_libraries(_c_expression PRIVATE
-        mindspore::pybind11_module
-        mindspore
-        mindspore_gvar
-        )
+if (WIN32)
+    target_link_libraries(_c_expression PRIVATE
+            mindspore::pybind11_module
+            securec
+            proto_input
+            mindspore::flatbuffers
+            )
+else()
+    target_link_libraries(_c_expression PRIVATE
+            mindspore::pybind11_module
+            mindspore
+            mindspore_gvar
+            )
+endif()
+
+if(USE_GLOG)
+    target_link_libraries(_c_expression PRIVATE mindspore::glog)
+endif()

 if(ENABLE_GPU)
-	execute_process(COMMAND bash ${CMAKE_SOURCE_DIR}/third_party/apply_patches.sh 
-		${CMAKE_BINARY_DIR} 
-		${dlpack_DIRPATH} 
-		${dmlc_core_DIRPATH} 
-		${rang_DIRPATH} 
-		${incubator_tvm_gpu_DIRPATH})
-    set(TVM_DIR "${CMAKE_BINARY_DIR}/incubator-tvm")
-    # Utility functions
-    include(${TVM_DIR}/cmake/util/Util.cmake)
-    include(${TVM_DIR}/cmake/util/FindCUDA.cmake)
-
-    # include directories
-    include_directories(AFTER "${TVM_DIR}/include")
-    include_directories(AFTER "${TVM_DIR}/src")
-    include_directories(AFTER "${TVM_DIR}")
-    include_directories(AFTER "${TVM_DIR}/src/schedule")
-
-    include_directories(AFTER "${TVM_DIR}/3rdparty/dmlc-core/include")
-    include_directories(AFTER "${TVM_DIR}/3rdparty/dlpack/include")
-    include_directories(AFTER "${TVM_DIR}/3rdparty/compiler-rt")
-    include_directories(AFTER "${TVM_DIR}/3rdparty/rang/include")
-
-    # lib contain dlopen and dlclose
-    set(TVM_RUNTIME_LINKER_LIBS ${CMAKE_DL_LIBS})
-
-    # add source group
-    file(GLOB_RECURSE GROUP_SOURCE "${TVM_DIR}/src/*.cc" "src/*.cc")
-    file(GLOB_RECURSE GROUP_INCLUDE "${TVM_DIR}/src/*.h"
-        "${TVM_DIR}/include/*.h" "src/*.h" "include/*.h")
-    assign_source_group("Source" ${GROUP_SOURCE})
-    assign_source_group("Include" ${GROUP_INCLUDE})
-
-    file(GLOB COMPILER_SRCS
-        "pre_activate/gpu/*.cc"
-        ${TVM_DIR}/src/api/*.cc
-        ${TVM_DIR}/src/arithmetic/*.cc
-        ${TVM_DIR}/src/autotvm/*.cc
-        ${TVM_DIR}/src/codegen/*.cc
-        ${TVM_DIR}/src/lang/*.cc
-        ${TVM_DIR}/src/pass/*.cc
-        ${TVM_DIR}/src/op/*.cc
-        ${TVM_DIR}/src/node/*.cc
-        ${TVM_DIR}/src/schedule/*.cc
-        ${TVM_DIR}/src/runtime/*.cc
-        ${TVM_DIR}/src/runtime/vm/*.cc
-        ${TVM_DIR}/src/runtime/vm/profiler/*.cc
-        ${TVM_DIR}/src/codegen/stackvm/*.cc)
-
-    file(GLOB_RECURSE RELAY_SRCS ${TVM_DIR}/src/relay/*.cc)
-    list(APPEND COMPILER_SRCS ${RELAY_SRCS})
-
-    file(GLOB DATATYPE_SRCS ${TVM_DIR}/src/codegen/datatype/*.cc)
-    list(APPEND COMPILER_SRCS ${DATATYPE_SRCS})
-
-    file(GLOB COMPILER_VERILOG_SRCS ${TVM_DIR}/src/codegen/verilog/*.cc)
-    list(APPEND COMPILER_SRCS ${COMPILER_VERILOG_SRCS})
-
-    file(GLOB TOPI_SRCS ${TVM_DIR}/topi/src/*.cc)
-
-    file(GLOB RUNTIME_SRCS
-        ${TVM_DIR}/src/runtime/*.cc
-        ${TVM_DIR}/src/runtime/vm/*.cc
-        ${TVM_DIR}/src/runtime/stub/*.cc
-        ${TVM_DIR}/src/runtime/stackvm/*.cc)
-
-
-    file(GLOB COMPILER_OFF_SRCS
-        ${TVM_DIR}/src/codegen/opt/build_*_off.cc)
-    set(USE_CUDA "OFF")
-    if(ENABLE_GPU)
-        list(REMOVE_ITEM COMPILER_OFF_SRCS
-            ${TVM_DIR}/src/codegen/opt/build_cuda_off.cc)
-        set(USE_CUDA "ON")
-    endif()
-    list(APPEND COMPILER_SRCS ${COMPILER_OFF_SRCS})
-    # Module rules
-    include(${TVM_DIR}/cmake/modules/CUDA.cmake)
-
-    set(CMAKE_C_FLAGS_AKG  -pipe -Wall -fPIC -fstack-protector-all)
-    set(CMAKE_C_FLAGS_AKG ${CMAKE_C_FLAGS_AKG} -Wl,-z,relro,-z,now,-z,noexecstack)
-
-    set(CMAKE_CXX_FLAGS_AKG  -std=c++11 -pipe -Wall -fPIC -fstack-protector-all)
-    set(CMAKE_CXX_FLAGS_AKG ${CMAKE_CXX_FLAGS_AKG} -Wl,-z,relro,-z,now,-z,noexecstack)
-
-    if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
-        message("-- Build in Debug mode")
-        set(CMAKE_C_FLAGS_AKG ${CMAKE_C_FLAGS_AKG} -O0 -g -rdynamic)
-        set(CMAKE_CXX_FLAGS_AKG ${CMAKE_CXX_FLAGS_AKG} -O0 -g -rdynamic)
-    else()
-        message("-- Build in Release mode")
-        set(CMAKE_C_FLAGS_AKG ${CMAKE_C_FLAGS_AKG} -O2 -Werror)
-        set(CMAKE_CXX_FLAGS_AKG ${CMAKE_CXX_FLAGS_AKG} -O2 -Werror)
-    endif()
-    if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION
-                                               VERSION_GREATER 7.0)
-        set(CMAKE_CXX_FLAGS_AKG ${CMAKE_CXX_FLAGS_AKG} -faligned-new)
-    endif()
-
-    add_library(akg OBJECT ${COMPILER_SRCS} ${RUNTIME_SRCS} ${TOPI_SRCS})
-
-    target_link_libraries(akg ${TVM_LINKER_LIBS} ${TVM_RUNTIME_LINKER_LIBS})
-    target_compile_options(akg PRIVATE
-        $<$<COMPILE_LANGUAGE:C>:${CMAKE_C_FLAGS_AKG}>
-        $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_CXX_FLAGS_AKG}>)
-    target_include_directories(akg PRIVATE "${TVM_DIR}/topi/include")
-
-    add_dependencies(_c_expression akg)
-    target_link_libraries(_c_expression PRIVATE akg)
+    target_link_libraries(_c_expression PRIVATE mindspore::tvm)
 endif()

 if(ENABLE_DUMP_PROTO)
-    target_link_libraries(_c_expression PRIVATE protobuf::libprotobuf)
+    target_link_libraries(_c_expression PRIVATE mindspore::protobuf)
 endif()

 if(ENABLE_GPU)
@ -473,6 +408,7 @@ if(ENABLE_GPU)
                          gpu_cuda_lib
                          gpu_queue
                          cublas
+                          ${CUDA_PATH}/lib64/libcurand.so
                          ${CUDNN_PATH}/lib64/libcudnn.so
                          ${CUDA_PATH}/lib64/libcudart.so
                          ${CUDA_PATH}/lib64/stubs/libcuda.so)
@ -492,90 +428,3 @@ if(ENABLE_MINDDATA)
    add_subdirectory(mindrecord)
    add_subdirectory(dataset)
 endif()
-set(MS_PACK_PATH ${CMAKE_SOURCE_DIR}/build/package/mindspore/)
-set(MS_LIB_PATH ${CMAKE_SOURCE_DIR}/build/package/mindspore/lib/)
-
-add_custom_target(add_ms_lib ALL
-    COMMAND mkdir -pv ${MS_LIB_PATH}
-    COMMAND cp ${MS_CCSRC_BUILD_PATH}/_c_expression* ${MS_PACK_PATH}
-    COMMAND cp ${MS_CCSRC_BUILD_PATH}/libmindspore_gvar.so ${MS_LIB_PATH}
-)
-add_dependencies(add_ms_lib _c_expression)
-
-if (NOT ENABLE_GE)
-    if (ENABLE_D)
-        if(DEFINED ENV{ASCEND_CUSTOM_PATH})
-            set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH})
-        else()
-            set(ASCEND_PATH /usr/local/Ascend)
-        endif()
-        set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common)
-        add_custom_target(add_ge_lib ALL
-                COMMAND cp ${MS_CCSRC_BUILD_PATH}/../../graphengine/src/common/graph/libgraph.so ${MS_LIB_PATH}
-                COMMAND cp ${MS_CCSRC_BUILD_PATH}/../../graphengine/src/ge/common/libge_common.so ${MS_LIB_PATH}
-                COMMAND cp ${MS_CCSRC_BUILD_PATH}/../../graphengine/src/ge/ge_runtime/libge_runtime.so ${MS_LIB_PATH}
-                COMMAND cp ${ASCEND_DRIVER_PATH}/libslog.so ${MS_LIB_PATH}
-                COMMAND cp ${ASCEND_DRIVER_PATH}/libc_sec.so ${MS_LIB_PATH}
-        )
-        add_dependencies(add_ge_lib add_ms_lib)
-        add_dependencies(add_ge_lib graph)
-        add_dependencies(add_ge_lib ge_runtime)
-    elseif(ENABLE_TESTCASES)
-        add_custom_target(add_ge_lib ALL
-                COMMAND cp ${MS_CCSRC_BUILD_PATH}/../../graphengine/src/common/graph/libgraph.so  ${MS_LIB_PATH}
-                COMMAND cp ${CMAKE_SOURCE_DIR}/graphengine/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR}/libslog.so ${MS_LIB_PATH}
-                COMMAND cp ${CMAKE_SOURCE_DIR}/graphengine/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR}/libc_sec.so ${MS_LIB_PATH}
-        )
-        add_dependencies(add_ge_lib add_ms_lib)
-        add_dependencies(add_ge_lib graph)
-    endif()
-endif()
-
-if (ENABLE_GPU)
-    if (ENABLE_MPI)
-        add_custom_target(add_mpi_lib ALL
-            COMMAND cp ${MS_CCSRC_BUILD_PATH}/_ms_mpi* ${MS_PACK_PATH}
-        )
-        add_dependencies(add_mpi_lib _ms_mpi)
-        add_custom_target(add_gpu_collective_lib ALL
-            COMMAND mkdir -pv ${MS_LIB_PATH}
-            COMMAND cp ${MS_CCSRC_BUILD_PATH}/libgpu_collective* ${MS_LIB_PATH}
-            )
-        add_dependencies(add_gpu_collective_lib gpu_collective)
-    endif()   
-    add_custom_target(add_gpu_queue_lib ALL
-        COMMAND cp ${MS_CCSRC_BUILD_PATH}/libgpu_queue* ${MS_LIB_PATH}
-            )
-    add_dependencies(add_gpu_queue_lib add_ms_lib)
-endif()
-
-if (ENABLE_CPU)
-    add_custom_target(add_cpu_lib ALL
-            COMMAND cp ${onednn_LIBPATH}/libdnnl.so.1.1 ${MS_LIB_PATH}/libdnnl.so.1
-            )
-    add_dependencies(add_cpu_lib add_ms_lib)
-endif()
-
-if (ENABLE_MINDDATA)
-    add_custom_target(add_minddata_lib ALL
-        COMMAND cp ${MS_CCSRC_BUILD_PATH}/dataset/*.so ${MS_PACK_PATH}
-        COMMAND cp ${MS_CCSRC_BUILD_PATH}/mindrecord/*.so ${MS_PACK_PATH}
-        COMMAND cp ${opencv_LIBPATH}/libopencv_core.so.4.2.0 ${MS_LIB_PATH}/libopencv_core.so.4.2
-        COMMAND cp ${opencv_LIBPATH}/libopencv_imgcodecs.so.4.2.0 ${MS_LIB_PATH}/libopencv_imgcodecs.so.4.2
-        COMMAND cp ${opencv_LIBPATH}/libopencv_imgproc.so.4.2.0 ${MS_LIB_PATH}/libopencv_imgproc.so.4.2
-    )
-    add_dependencies(add_minddata_lib add_ms_lib)
-    add_dependencies(add_minddata_lib _c_mindrecord)
-    add_dependencies(add_minddata_lib _c_dataengine)
-
-    add_dependencies(_c_mindrecord mindspore)
-    add_dependencies(_c_dataengine mindspore)
-endif()
-
-if (USE_GLOG)
-    target_link_libraries(_c_expression PRIVATE mindspore::glog)
-    add_custom_target(add_glog_lib ALL
-        COMMAND cp ${glog_LIBPATH}/libglog*.so.0 ${MS_LIB_PATH}
-            )
-    add_dependencies(add_glog_lib add_ms_lib)
-endif()
--- a/mindspore/ccsrc/common/trans.cc
+++ b/mindspore/ccsrc/common/trans.cc
@ -20,6 +20,8 @@
 #include <utility>
 #include "./securec.h"
 #include "common/utils.h"
+#include "session/anf_runtime_algorithm.h"
+#include "kernel/kernel.h"
 #include "device/convert_tensor_utils.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
@ -27,6 +29,33 @@

 namespace mindspore {
 namespace trans {
+namespace {
+std::vector<size_t> PaddingShapeTo4dByDefault(const std::vector<size_t> &shape) {
+  std::vector<size_t> shape_4d(4, 1);
+  switch (shape.size()) {
+    case 0:
+      return shape_4d;
+    case 1:
+      shape_4d[1] = shape[0];
+      break;
+    case 2:
+      shape_4d[1] = shape[0];
+      shape_4d[2] = shape[1];
+      break;
+    case 3:
+      shape_4d[1] = shape[0];
+      shape_4d[2] = shape[1];
+      shape_4d[3] = shape[2];
+      break;
+    case 4:
+      std::copy(shape.begin(), shape.end(), shape_4d.begin());
+      break;
+    default:
+      MS_LOG(EXCEPTION) << "Unexpect shape size = " << shape.size();
+  }
+  return shape_4d;
+}
+}  // namespace
 const size_t kNchwDims = 4;
 const std::map<TypeId, size_t> type_map = {{kNumberTypeBool, 1},    {kNumberTypeInt, 4},     {kNumberTypeInt8, 1},
                                           {kNumberTypeInt16, 2},   {kNumberTypeInt32, 4},   {kNumberTypeInt64, 8},
@ -154,38 +183,155 @@ size_t TypeIdSize(const TypeId data_type) {
  return unsupported_type_error;
 }

-std::vector<size_t> TransShapeTo4d(const std::vector<size_t> &shape) {
+bool IsNeedPadding(const std::string &format, const size_t shape_size) {
+  if (shape_size == 0) {
+    return false;
+  }
+  if (format == kOpFormat_DEFAULT || format == kOpFormat_FRAC_NZ) {
+    return false;
+  } else if (shape_size < 4) {
+    return true;
+  }
+  return false;
+}
+
+std::vector<int> GetRuntimePaddingShape(const AnfNodePtr &node, size_t index) {
+  std::vector<int> shape;
+  std::vector<size_t> host_shape;
+  if (node->isa<ValueNode>()) {
+    auto value_node = node->cast<ValueNodePtr>();
+    auto node_value = value_node->value();
+    auto tensor = node_value->cast<tensor::TensorPtr>();
+    if (tensor == nullptr) {
+      MS_LOG(EXCEPTION) << " the node[ " << node->DebugString() << "]'s cannot convert ";
+    }
+    auto shape_temp = tensor->shape();
+    (void)std::transform(shape_temp.begin(), shape_temp.end(), std::back_inserter(host_shape), IntToSize);
+    if (host_shape.empty()) {
+      host_shape.push_back(1);
+    }
+  } else {
+    host_shape = AnfAlgo::GetOutputInferShape(node, index);
+  }
+  if (trans::IsNeedPadding(AnfAlgo::GetOutputFormat(node, 0), host_shape.size())) {
+    host_shape = trans::PaddingShapeTo4d(host_shape, AnfAlgo::GetOutputReshapeType(node, 0));
+  }
+  std::transform(host_shape.begin(), host_shape.end(), std::back_inserter(shape), SizeToInt);
+  return shape;
+}
+
+std::vector<size_t> PaddingShapeTo4d(const std::vector<size_t> &shape, const std::vector<kernel::Axis> &padding_axis) {
+  if (padding_axis.empty() || shape.size() != padding_axis.size()) {
+    return PaddingShapeTo4dByDefault(shape);
+  }
  std::vector<size_t> shape_4d(4, 1);
-  switch (shape.size()) {
-    case 0:
-      break;
-    case 1:
-      shape_4d[1] = shape[0];
-      break;
-    case 2:
-      shape_4d[0] = shape[0];
-      shape_4d[1] = shape[1];
-      break;
-    case 3:
-      MS_LOG(EXCEPTION) << "Unexpected shape size = 3,it should has a default format";
-    case 4:
-      for (size_t i = 0; i < 4; ++i) {
-        shape_4d[i] = shape[i];
-      }
-      break;
-    default:
-      MS_LOG(EXCEPTION) << "Unexpected shape size = " << shape.size();
+  for (size_t index = 0; index < padding_axis.size(); index++) {
+    shape_4d[padding_axis[index]] = shape[index];
  }
  return shape_4d;
 }

+namespace {
+bool CheckDims(const std::vector<size_t> &shape) {
+  if (shape.size() != 4) {
+    MS_LOG(ERROR) << "Host shape dims shoud be 4";
+    return false;
+  }
+  return true;
+}
+
+std::vector<size_t> NchwDeviceShape(const std::vector<size_t> &shape) {
+  if (!CheckDims(shape)) {
+    MS_LOG(EXCEPTION) << "Check dims failed.";
+  }
+  return shape;
+}
+
+std::vector<size_t> NhwcDeviceShape(const std::vector<size_t> &shape) {
+  if (!CheckDims(shape)) {
+    MS_LOG(EXCEPTION) << "Ccheck dims failed.";
+  }
+  std::vector<size_t> device_shape;
+  device_shape.push_back(shape[0]);
+  device_shape.push_back(shape[2]);
+  device_shape.push_back(shape[3]);
+  device_shape.push_back(shape[1]);
+  return device_shape;
+}
+
+std::vector<size_t> HwchDeviceShape(const std::vector<size_t> &shape) {
+  if (!CheckDims(shape)) {
+    MS_LOG(EXCEPTION) << "Check dims failed.";
+  }
+  std::vector<size_t> device_shape;
+  device_shape.push_back(shape[2]);
+  device_shape.push_back(shape[3]);
+  device_shape.push_back(shape[1]);
+  device_shape.push_back(shape[0]);
+  return device_shape;
+}
+
+std::vector<size_t> FracZDeviceShape(const std::vector<size_t> &shape) {
+  if (!CheckDims(shape)) {
+    MS_LOG(EXCEPTION) << "Check dims failed.";
+  }
+  std::vector<size_t> device_shape;
+  size_t cout16 = ((shape[0] + kCubeSize - 1) / kCubeSize) * kCubeSize;
+  size_t cin16 = ((shape[1] + kCubeSize - 1) / kCubeSize) * kCubeSize;
+  device_shape.push_back(shape[2] * shape[3] * cin16 / kCubeSize);
+  device_shape.push_back(cout16 / kCubeSize);
+  device_shape.push_back(kCubeSize);
+  device_shape.push_back(kCubeSize);
+  return device_shape;
+}
+
+std::vector<size_t> Nc1hwc0DeviceShape(const std::vector<size_t> &shape) {
+  if (!CheckDims(shape)) {
+    MS_LOG(EXCEPTION) << "Check dims failed.";
+  }
+  std::vector<size_t> device_shape;
+  size_t C1 = (shape[1] + kCubeSize - 1) / kCubeSize;
+  size_t C0 = kCubeSize;
+  device_shape.push_back(shape[0]);
+  device_shape.push_back(C1);
+  device_shape.push_back(shape[2]);
+  device_shape.push_back(shape[3]);
+  device_shape.push_back(C0);
+  return device_shape;
+}
+
+std::vector<size_t> C1hwncoc0DeviceShape(const std::vector<size_t> &shape) {
+  if (!CheckDims(shape)) {
+    MS_LOG(EXCEPTION) << "Check dims failed.";
+  }
+  std::vector<size_t> device_shape;
+  device_shape.push_back((shape[1] - 1) / kCubeSize + 1);
+  device_shape.push_back(shape[2]);
+  device_shape.push_back(shape[3]);
+  device_shape.push_back(shape[0]);
+  device_shape.push_back(kCubeSize);
+  device_shape.push_back(kCubeSize);
+  return device_shape;
+}
+}  // namespace
+
 std::vector<size_t> TransShapeToDevice(const std::vector<size_t> &shape, const std::string &format) {
+  using DeviceShapeTransfer = std::function<std::vector<size_t>(const std::vector<size_t> &)>;
+  const std::map<std::string, DeviceShapeTransfer> device_shape_map{
+    {kOpFormat_NCHW, NchwDeviceShape},       {kOpFormat_NHWC, NhwcDeviceShape},
+    {kOpFormat_HWCN, HwchDeviceShape},       {kOpFormat_FRAC_Z, FracZDeviceShape},
+    {kOpFormat_NC1HWC0, Nc1hwc0DeviceShape}, {kOpFormat_C1HWNCoC0, C1hwncoc0DeviceShape},
+  };
+
+  if (format == kOpFormat_ND || format == kOpFormat_DEFAULT) {
+    return shape;
+  }
+  auto temp_shape = shape;
  std::vector<size_t> device_shape;
  if (format == kOpFormat_FRAC_NZ) {
    if (shape.size() < 2) {
-      MS_EXCEPTION(NotSupportError) << "Format " << format << " is not support shape " << shape.size();
-    }
-    if (shape.size() > 2) {
+      MS_LOG(EXCEPTION) << "Format" << format << " is not support shape " << shape.size();
+    } else {
      (void)std::copy(shape.begin(), shape.end() - 2, std::back_inserter(device_shape));
    }
    auto h1 = (shape[shape.size() - 2] - 1) / kCubeSize + 1;
@ -197,39 +343,34 @@ std::vector<size_t> TransShapeToDevice(const std::vector<size_t> &shape, const s
    return device_shape;
  }
  if (shape.size() != 4) {
-    MS_LOG(EXCEPTION) << "shape_4d size should be 4";
+    MS_LOG(WARNING) << "Get Device Shape using a shape size is less than 4 ,should be Padding shape by Default firstly";
+    temp_shape = PaddingShapeTo4dByDefault(shape);
  }
-  if (format == kOpFormat_NC1HWC0) {
-    size_t C1 = (shape[1] + kCubeSize - 1) / kCubeSize;
-    size_t C0 = kCubeSize;
-    device_shape.push_back(shape[0]);
-    device_shape.push_back(C1);
-    device_shape.push_back(shape[2]);
-    device_shape.push_back(shape[3]);
-    device_shape.push_back(C0);
-    return device_shape;
-  } else if (format == kOpFormat_FRAC_Z) {
-    size_t cout16 = ((shape[0] + kCubeSize - 1) / kCubeSize) * kCubeSize;
-    size_t cin16 = ((shape[1] + kCubeSize - 1) / kCubeSize) * kCubeSize;
-    device_shape.push_back(shape[2] * shape[3] * cin16 / kCubeSize);
-    device_shape.push_back(cout16 / kCubeSize);
-    device_shape.push_back(kCubeSize);
-    device_shape.push_back(kCubeSize);
-    return device_shape;
-  } else if (format == kOpFormat_NHWC) {
-    device_shape.push_back(shape[0]);
-    device_shape.push_back(shape[2]);
-    device_shape.push_back(shape[3]);
-    device_shape.push_back(shape[1]);
-    return device_shape;
-  } else if (format == kOpFormat_NCHW) {
-    return shape;
-  } else if (format == kOpFormat_HWCN) {
-    return {shape[2], shape[3], shape[1], shape[0]};
+  auto iter = device_shape_map.find(format);
+  if (iter != device_shape_map.end()) {
+    return iter->second(temp_shape);
  }
  MS_LOG(EXCEPTION) << "Unexpected format[" << format << "]";
 }

+bool CheckArgs(const FormatArgs &args, size_t *size, size_t *total_size) {
+  if (args.host_shape.size() != kNchwDims) {
+    MS_LOG(ERROR) << "Invalid host shape, host shape dims:" << args.host_shape.size() << ", expect dims:" << kNchwDims;
+    return false;
+  }
+  *size = TypeIdSize(args.src_data_type);
+  if (*size < 1) {
+    MS_LOG(ERROR) << "Illegal dtype.";
+    return false;
+  }
+  *total_size = ShapeSize(args.device_shape) * (*size);
+  if (*total_size != args.device_size) {
+    MS_LOG(ERROR) << "Illegal total data size, total_size:" << *total_size << ", device_size:" << args.device_size;
+    return false;
+  }
+  return true;
+}
+
 bool TransDataType(const TypeIdArgs &args, void *result) {
  MS_LOG(DEBUG) << "Begin trans datatype from " << TypeIdLabel(args.host_data_type) << " to "
                << TypeIdLabel(args.device_data_type);
@ -264,13 +405,14 @@ bool TransFormat(const FormatArgs &args, void *result) {
    MS_LOG(ERROR) << "Invalid datatype..";
    return false;
  }
-  if ((args.host_format == kOpFormat_NCHW || args.host_format == kOpFormat_ND) &&
-      args.device_format == kOpFormat_FRAC_Z) {
+  if (args.device_format == kOpFormat_FRAC_Z) {
    return NchwToFracZ(args, result);
  } else if (args.device_format == kOpFormat_FRAC_NZ) {
    return NchwToFracNz(args, result);
  } else if (args.device_format == kOpFormat_NC1HWC0) {
    return NchwToNc1hwc0(args, result);
+  } else if (args.device_format == kOpFormat_C1HWNCoC0) {
+    return NchwToC1hwncoc0(args, result);
  }
  return true;
 }
@ -281,13 +423,14 @@ bool TransFormatFromDeviceToHost(const FormatArgs &args, void *result) {
    MS_LOG(ERROR) << "Invalid datatype..";
    return false;
  }
-  if ((args.host_format == kOpFormat_NCHW || args.host_format == kOpFormat_ND) &&
-      args.device_format == kOpFormat_FRAC_Z) {
+  if (args.device_format == kOpFormat_FRAC_Z) {
    return FracZToNchw(args, result);
  } else if (args.device_format == kOpFormat_FRAC_NZ) {
    return FracNzToNchw(args, result);
  } else if (args.device_format == kOpFormat_NC1HWC0) {
    return Nc1hwc0ToNchw(args, result);
+  } else if (args.device_format == kOpFormat_C1HWNCoC0) {
+    return C1hwncoc0ToNchw(args, result);
  }
  return true;
 }
@ -745,5 +888,99 @@ bool Nc1hwc0ToNchw(const FormatArgs &args, void *result) {
  }
  return true;
 }
+
+bool NchwToC1hwncoc0(const FormatArgs &args, void *result) {
+  // trans nchw to c1hwncoc0
+  MS_LOG(DEBUG) << "Trans format from nchw to c1hwncoc0.";
+  MS_EXCEPTION_IF_NULL(result);
+  size_t size = 0;
+  size_t total_size = 0;
+  if (!CheckArgs(args, &size, &total_size)) {
+    MS_LOG(ERROR) << "Check args failed.";
+    return false;
+  }
+  auto n = args.host_shape[0];
+  auto c = args.host_shape[1];
+  auto h = args.host_shape[2];
+  auto w = args.host_shape[3];
+  auto c1 = args.device_shape[0];
+  auto co = args.device_shape[4];
+  auto c0 = args.device_shape[5];
+  for (size_t c1_i = 0; c1_i < c1; c1_i++) {
+    for (size_t h_i = 0; h_i < h; h_i++) {
+      for (size_t w_i = 0; w_i < w; w_i++) {
+        for (size_t n_i = 0; n_i < n; n_i++) {
+          for (size_t co_i = 0; co_i < co; co_i++) {
+            for (size_t c0_i = 0; c0_i < c0; c0_i++) {
+              size_t dst_offset = (c1_i * h * w * n * co * c0 + h_i * w * n * co * c0 + w_i * n * co * c0 +
+                                   n_i * co * c0 + co_i * c0 + c0_i) *
+                                  size;
+              size_t protected_size = total_size - dst_offset < static_cast<size_t>(SECUREC_MEM_MAX_LEN)
+                                        ? total_size - dst_offset
+                                        : static_cast<size_t>(SECUREC_MEM_MAX_LEN);
+              size_t c_i = c0_i + c1_i * c0;
+              size_t src_offset = (n_i * c * h * w + c_i * h * w + h_i * w + w_i) * size;
+              errno_t ret;
+              if (c_i < c && c0_i == co_i) {
+                ret = memcpy_s(static_cast<uint8_t *>(result) + dst_offset, protected_size,
+                               static_cast<uint8_t const *>(args.data) + src_offset, size);
+              } else {
+                ret = memset_s(static_cast<uint8_t *>(result) + dst_offset, protected_size, 0, size);
+              }
+              if (ret != EOK) {
+                MS_LOG(ERROR) << "Failed to operate the dst memory, error-code:" << ret;
+                return false;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+
+bool C1hwncoc0ToNchw(const FormatArgs &args, void *result) {
+  // trans c1hwncoc0 to nchw
+  MS_LOG(DEBUG) << "Trans format from c1hwncoc0 to nchw";
+  MS_EXCEPTION_IF_NULL(result);
+  size_t size = 0;
+  size_t total_size = 0;
+  if (!CheckArgs(args, &size, &total_size)) {
+    MS_LOG(ERROR) << "Check args failed.";
+    return false;
+  }
+  auto n = args.host_shape[0];
+  auto c = args.host_shape[1];
+  auto h = args.host_shape[2];
+  auto w = args.host_shape[3];
+  auto co = args.device_shape[4];
+  auto c0 = args.device_shape[5];
+  for (size_t n_i = 0; n_i < n; n_i++) {
+    for (size_t c_i = 0; c_i < c; c_i++) {
+      for (size_t h_i = 0; h_i < h; h_i++) {
+        for (size_t w_i = 0; w_i < w; w_i++) {
+          size_t dst_offset = (n_i * c * h * w + c_i * h * w + h_i * w + w_i) * size;
+          size_t c1_i = c_i / kCubeSize;
+          size_t c0_i = c_i % kCubeSize;
+          size_t co_i = c0_i;
+          size_t src_offset = (c1_i * h * w * n * co * c0 + h_i * w * n * co * c0 + w_i * n * co * c0 + n_i * co * c0 +
+                               co_i * c0 + c0_i) *
+                              size;
+          size_t protected_size = total_size - dst_offset < static_cast<size_t>(SECUREC_MEM_MAX_LEN)
+                                    ? total_size - dst_offset
+                                    : static_cast<size_t>(SECUREC_MEM_MAX_LEN);
+          auto ret = memcpy_s(static_cast<uint8_t *>(result) + dst_offset, protected_size,
+                              static_cast<uint8_t const *>(args.data) + src_offset, size);
+          if (ret != EOK) {
+            MS_LOG(ERROR) << "Failed to operate the dst memory, error-code:" << ret;
+            return false;
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
 }  // namespace trans
 }  // namespace mindspore
--- a/mindspore/ccsrc/common/trans.h
+++ b/mindspore/ccsrc/common/trans.h
@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>
 #include "ir/dtype.h"
+#include "kernel/kernel.h"
 #include "ir/dtype/type.h"

 namespace mindspore {
@ -49,7 +50,10 @@ size_t TypeIdSize(const TypeId data_type);
 size_t ShapeSize(const std::vector<size_t> &shape);
 size_t CubeSizeByType(const TypeId data_type);

-std::vector<size_t> TransShapeTo4d(const std::vector<size_t> &shape);
+std::vector<size_t> PaddingShapeTo4d(const std::vector<size_t> &shape,
+                                     const std::vector<kernel::Axis> &padding_axis = {});
+std::vector<int> GetRuntimePaddingShape(const AnfNodePtr &node, size_t index);
+bool IsNeedPadding(const std::string &format, const size_t shape_size);
 std::vector<size_t> TransShapeToDevice(const std::vector<size_t> &shape, const std::string &format);
 bool TransDataType(const TypeIdArgs &args, void *result);
 bool TransFormat(const FormatArgs &args, void *result);
@ -59,10 +63,12 @@ bool TransFormatFromDeviceToHost(const FormatArgs &args, void *result);
 bool NchwToFracZ(const FormatArgs &args, void *result);
 bool NchwToFracNz(const FormatArgs &args, void *result);
 bool NchwToNc1hwc0(const FormatArgs &args, void *result);
+bool NchwToC1hwncoc0(const FormatArgs &args, void *result);
 // device to host
 bool FracZToNchw(const FormatArgs &args, void *result);
 bool FracNzToNchw(const FormatArgs &args, void *result);
 bool Nc1hwc0ToNchw(const FormatArgs &args, void *result);
+bool C1hwncoc0ToNchw(const FormatArgs &args, void *result);
 }  // namespace trans
 }  // namespace mindspore

--- a/mindspore/ccsrc/dataset/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/CMakeLists.txt
@ -3,10 +3,18 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-reorder")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-switch")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sequence-point")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-maybe-uninitialized")
+
+if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-uninitialized")
+else()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-maybe-uninitialized")
+endif()
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes")

+if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--image-base -Wl,0x10000000")
+endif()
 ############################# Options ################################
 if (ENABLE_GPUQUE)
    add_definitions(-D ENABLE_GPUQUE)
@ -75,14 +83,19 @@ set_target_properties(_c_dataengine PROPERTIES
 ######################################################################

 ################# Link with external libraries ########################
-target_link_libraries(_c_dataengine PRIVATE mindspore mindspore_gvar)
-target_link_libraries(_c_dataengine PRIVATE mindspore::pybind11_module -ldl protobuf::libprotobuf ${SECUREC_LIBRARY})
+if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+    target_link_libraries(_c_dataengine PRIVATE mindspore)
+    target_link_libraries(_c_dataengine PRIVATE mindspore::pybind11_module ${PYTHON_LIBRARIES} mindspore::protobuf ${SECUREC_LIBRARY})
+else()
+    target_link_libraries(_c_dataengine PRIVATE mindspore mindspore_gvar)
+    target_link_libraries(_c_dataengine PRIVATE mindspore::pybind11_module -ldl mindspore::protobuf ${SECUREC_LIBRARY})
+endif()
 target_link_libraries(_c_dataengine PUBLIC mindspore::jpeg_turbo mindspore::opencv_core mindspore::opencv_imgcodecs
        mindspore::opencv_imgproc)
 if (ENABLE_GPUQUE)
    target_link_libraries(_c_dataengine PRIVATE gpu_queue
                                     ${CUDNN_PATH}/lib64/libcudnn.so
-                                     ${CUDA_PATH}/lib64/libcudart.so
+				     ${CUDA_PATH}/lib64/libcudart.so
                                     ${CUDA_PATH}/lib64/stubs/libcuda.so)
 endif ()

@ -91,7 +104,12 @@ if (ENABLE_TDTQUE)
 endif ()

 add_dependencies(_c_dataengine _c_mindrecord)
-target_link_libraries(_c_dataengine PRIVATE _c_mindrecord)
+if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+    set(MINDRECORD_LINK_OBJECT ${CMAKE_BINARY_DIR}/mindspore/ccsrc/mindrecord/CMakeFiles/_c_mindrecord.dir/objects.a)
+    target_link_libraries(_c_dataengine PRIVATE _c_mindrecord ${MINDRECORD_LINK_OBJECT} mindspore::sqlite)
+else()
+    target_link_libraries(_c_dataengine PRIVATE _c_mindrecord)
+endif()

 if (USE_GLOG)
    target_link_libraries(_c_dataengine PRIVATE mindspore::glog)
--- a/mindspore/ccsrc/dataset/api/de_pipeline.cc
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.cc
@ -47,12 +47,14 @@ static std::unordered_map<uint32_t, pFunction> g_parse_op_func_ = {{kStorage, &D
                                                                   {kMap, &DEPipeline::ParseMapOp},
                                                                   {kBatch, &DEPipeline::ParseBatchOp},
                                                                   {kRepeat, &DEPipeline::ParseRepeatOp},
+                                                                   {kSkip, &DEPipeline::ParseSkipOp},
                                                                   {kZip, &DEPipeline::ParseZipOp},
                                                                   {kRename, &DEPipeline::ParseRenameOp},
                                                                   {kDeviceQueue, &DEPipeline::ParseDeviceQueueOp},
                                                                   {kGenerator, &DEPipeline::ParseGeneratorOp},
                                                                   {kTfReader, &DEPipeline::ParseTFReaderOp},
                                                                   {kProject, &DEPipeline::ParseProjectOp},
+                                                                   {kTake, &DEPipeline::ParseTakeOp},
                                                                   {kImageFolder, &DEPipeline::ParseImageFolderOp},
                                                                   {kMnist, &DEPipeline::ParseMnistOp},
                                                                   {kManifest, &DEPipeline::ParseManifestOp},
@ -422,6 +424,11 @@ Status DEPipeline::ParseMindRecordOp(const py::dict &args, std::shared_ptr<Datas
      } else if (key == "global_shuffle" && ToBool(value) == true) {
        uint32_t seed = args["partitions"].is_none() ? GetSeed() : 0;
        operators.push_back(std::make_shared<mindrecord::ShardShuffle>(seed));
+      } else if (key == "sampler") {
+        auto create = py::reinterpret_borrow<py::object>(value).attr("_create_for_minddataset");
+        std::shared_ptr<mindrecord::ShardOperator> sample_op =
+          create().cast<std::shared_ptr<mindrecord::ShardOperator>>();
+        operators.push_back(sample_op);
      }
    }
  }
@ -506,13 +513,24 @@ Status DEPipeline::ParseRepeatOp(const py::dict &args, std::shared_ptr<DatasetOp
  return Status::OK();
 }

+Status DEPipeline::ParseSkipOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+  if (args["count"].is_none()) {
+    std::string err_msg = "Error: count is invalid or not set.";
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  std::shared_ptr<SkipOp> op;
+  RETURN_IF_NOT_OK(SkipOp::Builder(ToInt(args["count"])).Build(&op));
+  *ptr = op;
+  return Status::OK();
+}
+
 Status DEPipeline::ParseGeneratorOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
  std::shared_ptr<GeneratorOp::Builder> builder = std::make_shared<GeneratorOp::Builder>();
  for (auto arg : args) {
    std::string key = py::str(arg.first);
    py::handle value = arg.second;
    if (!value.is_none()) {
-      if (key == "generator_function") {
+      if (key == "source") {
        py::object obj = py::cast(&value);
        if (!py::isinstance<py::function>(obj)) {
          std::string err_msg = "Error: generator is invalid or not set.";
@ -633,7 +651,16 @@ Status DEPipeline::ParseRenameOp(const py::dict &args, std::shared_ptr<DatasetOp
  return Status::OK();
 }

-DsOpPtr DEPipeline::ParseTakeOp(const py::dict &args) const { return DsOpPtr(); }
+Status DEPipeline::ParseTakeOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+  if (args["count"].is_none()) {
+    std::string err_msg = "Error: count is invalid or not set.";
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  std::shared_ptr<TakeOp> op;
+  RETURN_IF_NOT_OK(TakeOp::Builder(ToInt(args["count"])).Build(&op));
+  *ptr = op;
+  return Status::OK();
+}

 Status DEPipeline::ParseZipOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
  std::shared_ptr<ZipOp::Builder> builder = std::make_shared<ZipOp::Builder>();
--- a/mindspore/ccsrc/dataset/api/de_pipeline.h
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.h
@ -42,6 +42,7 @@ enum OpName {
  kBatch,
  kCache,
  kRepeat,
+  kSkip,
  kTake,
  kZip,
  kMap,
@ -107,13 +108,15 @@ class DEPipeline {

  Status ParseRepeatOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);

+  Status ParseSkipOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+
  Status ParseBatchOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);

  Status ParseGeneratorOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);

  Status ParseRenameOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);

-  DsOpPtr ParseTakeOp(const py::dict &args) const;
+  Status ParseTakeOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);

  Status ParseZipOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);

--- a/mindspore/ccsrc/dataset/api/python_bindings.cc
+++ b/mindspore/ccsrc/dataset/api/python_bindings.cc
@ -19,7 +19,9 @@
 #include "dataset/kernels/no_op.h"
 #include "dataset/kernels/data/one_hot_op.h"
 #include "dataset/kernels/image/center_crop_op.h"
+#if !defined(_WIN32) && !defined(_WIN64)
 #include "dataset/kernels/image/change_mode_op.h"
+#endif
 #include "dataset/kernels/image/cut_out_op.h"
 #include "dataset/kernels/image/decode_op.h"
 #include "dataset/kernels/image/distort_bounding_box_crop_op.h"
@ -54,6 +56,9 @@
 #include "dataset/engine/datasetops/source/tf_reader_op.h"
 #include "dataset/engine/jagged_connector.h"
 #include "dataset/kernels/data/to_float16_op.h"
+#include "dataset/util/random.h"
+#include "mindrecord/include/shard_operator.h"
+#include "mindrecord/include/shard_sample.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 #include "pybind11/stl_bind.h"
@ -220,11 +225,13 @@ void bindTensor(py::module *m) {
  (void)py::class_<DataType>(*m, "DataType")
    .def(py::init<std::string>())
    .def(py::self == py::self)
-    .def("__str__", &DataType::ToString);
+    .def("__str__", &DataType::ToString)
+    .def("__deepcopy__", [](py::object &t, py::dict memo) { return t; });
 }

 void bindTensorOps1(py::module *m) {
-  (void)py::class_<TensorOp, std::shared_ptr<TensorOp>>(*m, "TensorOp");
+  (void)py::class_<TensorOp, std::shared_ptr<TensorOp>>(*m, "TensorOp")
+    .def("__deepcopy__", [](py::object &t, py::dict memo) { return t; });

  (void)py::class_<NormalizeOp, TensorOp, std::shared_ptr<NormalizeOp>>(
    *m, "NormalizeOp", "Tensor operation to normalize an image. Takes mean and std.")
@ -276,9 +283,11 @@ void bindTensorOps2(py::module *m) {
         py::arg("fillG") = RandomCropOp::kDefFillG, py::arg("fillB") = RandomCropOp::kDefFillB);
  (void)py::class_<HwcToChwOp, TensorOp, std::shared_ptr<HwcToChwOp>>(*m, "ChannelSwapOp").def(py::init<>());

+#if !defined(_WIN32) && !defined(_WIN64)
  (void)py::class_<ChangeModeOp, TensorOp, std::shared_ptr<ChangeModeOp>>(
    *m, "ChangeModeOp", "Tensor operation to change colors from BGR to RGB")
    .def(py::init<>());
+#endif

  (void)py::class_<OneHotOp, TensorOp, std::shared_ptr<OneHotOp>>(
    *m, "OneHotOp", "Tensor operation to apply one hot encoding. Takes number of classes.")
@ -381,7 +390,17 @@ void bindTensorOps4(py::module *m) {
 }

 void bindSamplerOps(py::module *m) {
-  (void)py::class_<Sampler, std::shared_ptr<Sampler>>(*m, "Sampler");
+  (void)py::class_<Sampler, std::shared_ptr<Sampler>>(*m, "Sampler")
+    .def("set_num_rows", [](Sampler &self, int64_t rows) { THROW_IF_ERROR(self.SetNumRowsInDataset(rows)); })
+    .def("set_num_samples", [](Sampler &self, int64_t samples) { THROW_IF_ERROR(self.SetNumSamples(samples)); })
+    .def("initialize", [](Sampler &self) { THROW_IF_ERROR(self.InitSampler()); })
+    .def("get_indices", [](Sampler &self) {
+      py::array ret;
+      THROW_IF_ERROR(self.GetAllIdsThenReset(&ret));
+      return ret;
+    });
+
+  (void)py::class_<mindrecord::ShardOperator, std::shared_ptr<mindrecord::ShardOperator>>(*m, "ShardOperator");

  (void)py::class_<DistributedSampler, Sampler, std::shared_ptr<DistributedSampler>>(*m, "DistributedSampler")
    .def(py::init<int64_t, int64_t, bool, uint32_t>(), py::arg("numDev"), py::arg("devId"), py::arg("shuffle"),
@ -399,6 +418,10 @@ void bindSamplerOps(py::module *m) {
  (void)py::class_<SubsetRandomSampler, Sampler, std::shared_ptr<SubsetRandomSampler>>(*m, "SubsetRandomSampler")
    .def(py::init<std::vector<int64_t>>(), py::arg("indices"));

+  (void)py::class_<mindrecord::ShardSample, mindrecord::ShardOperator, std::shared_ptr<mindrecord::ShardSample>>(
+    *m, "MindrecordSubsetRandomSampler")
+    .def(py::init<std::vector<int64_t>, uint32_t>(), py::arg("indices"), py::arg("seed") = GetSeed());
+
  (void)py::class_<WeightedRandomSampler, Sampler, std::shared_ptr<WeightedRandomSampler>>(*m, "WeightedRandomSampler")
    .def(py::init<std::vector<double>, int64_t, bool>(), py::arg("weights"), py::arg("numSamples"),
         py::arg("replacement"));
@ -406,7 +429,7 @@ void bindSamplerOps(py::module *m) {

 void bindInfoObjects(py::module *m) {
  (void)py::class_<BatchOp::CBatchInfo>(*m, "CBatchInfo")
-    .def(py::init<int32_t, int32_t, int32_t>())
+    .def(py::init<int64_t, int64_t, int64_t>())
    .def("get_epoch_num", &BatchOp::CBatchInfo::get_epoch_num)
    .def("get_batch_num", &BatchOp::CBatchInfo::get_batch_num);
 }
@ -423,6 +446,7 @@ PYBIND11_MODULE(_c_dataengine, m) {
    .value("MINDRECORD", OpName::kMindrecord)
    .value("CACHE", OpName::kCache)
    .value("REPEAT", OpName::kRepeat)
+    .value("SKIP", OpName::kSkip)
    .value("TAKE", OpName::kTake)
    .value("ZIP", OpName::kZip)
    .value("MAP", OpName::kMap)
--- a/mindspore/ccsrc/dataset/core/client.h
+++ b/mindspore/ccsrc/dataset/core/client.h
@ -32,11 +32,13 @@
 #include "dataset/engine/datasetops/project_op.h"
 #include "dataset/engine/datasetops/rename_op.h"
 #include "dataset/engine/datasetops/repeat_op.h"
+#include "dataset/engine/datasetops/skip_op.h"
 #include "dataset/engine/datasetops/shuffle_op.h"
 #include "dataset/engine/datasetops/source/generator_op.h"
 #include "dataset/engine/datasetops/source/mindrecord_op.h"
 #include "dataset/engine/datasetops/source/storage_op.h"
 #include "dataset/engine/datasetops/source/tf_reader_op.h"
+#include "dataset/engine/datasetops/take_op.h"
 #include "dataset/engine/datasetops/zip_op.h"
 #include "dataset/engine/execution_tree.h"
 #include "dataset/util/status.h"
--- a/mindspore/ccsrc/dataset/core/tensor.cc
+++ b/mindspore/ccsrc/dataset/core/tensor.cc
@ -85,6 +85,7 @@ Tensor &Tensor::operator=(Tensor &&other) noexcept {
    shape_ = other.shape();
    type_ = other.type();
    data_ = other.StartAddr();
+    data_end_ = other.data_end_;
    data_allocator_ = std::move(other.data_allocator_);
    other.Invalidate();
  }
@ -208,11 +209,13 @@ Tensor::~Tensor() {
    if (data_allocator_ != nullptr) {
      data_allocator_->deallocate(data_);
      data_ = nullptr;
+      data_end_ = nullptr;
    } else {
      // If we didn't have an allocator, but data_ is not null then it must
      // be a stand-alone tensor that used malloc directly.
      free(data_);
      data_ = nullptr;
+      data_end_ = nullptr;
    }
  }
 }
@ -338,8 +341,10 @@ unsigned char *Tensor::StartAddr() {
    // on the shape and type and allocate it.
    if (data_allocator_ != nullptr) {
      data_ = data_allocator_->allocate(this->SizeInBytes());
+      data_end_ = data_ + SizeInBytes();
    } else {
      data_ = static_cast<unsigned char *>(malloc(this->SizeInBytes()));
+      data_end_ = data_ + SizeInBytes();
      if (data_ == nullptr) {
        return nullptr;
      }
@ -362,6 +367,7 @@ void Tensor::Invalidate() {
  shape_ = TensorShape::CreateUnknownRankShape();
  type_ = DataType(DataType::DE_UNKNOWN);
  data_ = nullptr;
+  data_end_ = nullptr;
  data_allocator_ = nullptr;
 }

@ -491,6 +497,8 @@ Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {

 // return data as numpy, should return status
 Status Tensor::GetDataAsNumpy(py::array *data) {
+  RETURN_UNEXPECTED_IF_NULL(data_);
+  RETURN_UNEXPECTED_IF_NULL(data);
  if (type_ == DataType::DE_BOOL) {
    *data = py::array_t<bool>(shape_.AsVector(), reinterpret_cast<bool *>(data_));
  } else if (type_ == DataType::DE_INT8) {
--- a/mindspore/ccsrc/dataset/core/tensor.h
+++ b/mindspore/ccsrc/dataset/core/tensor.h
@ -22,6 +22,10 @@
 #include <vector>
 #include "./securec.h"
 #include "utils/log_adapter.h"
+#if defined(_WIN32) || defined(_WIN64)
+#undef HAVE_STDDEF_H
+#undef HAVE_STDLIB_H
+#endif
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
@ -359,7 +363,7 @@ class Tensor {
  // @return TensorIterator
  template <typename T>
  TensorIterator<T> end() {
-    return TensorIterator<T>(data_ + SizeInBytes());
+    return TensorIterator<T>(data_end_);
  }

 protected:
@ -398,6 +402,8 @@ class Tensor {
  unsigned char *data_;
  // An allocator for data_
  CharAllocPtr data_allocator_;
+  // pointer to the end of the physical data
+  unsigned char *data_end_ = nullptr;
 };
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt
@ -5,12 +5,13 @@ add_library(engine-datasetops OBJECT
    parallel_op.cc
    pipeline_op.cc
    batch_op.cc
-    batch_op.cc
    device_queue_op.cc
    map_op.cc
    project_op.cc
    rename_op.cc
    repeat_op.cc
+    skip_op.cc
+    take_op.cc
    shuffle_op.cc
    zip_op.cc
    )
--- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
@ -57,7 +57,7 @@ BatchOp::BatchOp(int32_t batch_size, bool drop, int32_t op_queue_size, int32_t n
 Status BatchOp::operator()() {
  RETURN_IF_NOT_OK(LaunchThreadsAndInitOp());
  TaskManager::FindMe()->Post();
-  int32_t epoch_num = 0, batch_num = 0, cnt = 0;
+  int64_t epoch_num = 0, batch_num = 0, cnt = 0;
  TensorRow new_row;
  std::unique_ptr<TensorQTable> table = std::make_unique<TensorQTable>();
  child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0);
--- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h
@ -124,17 +124,17 @@ class BatchOp : public ParallelOp {
  // This struct is used for both internal control and python callback.
  // This struct is bound to python with read-only access.
  struct CBatchInfo {
-    CBatchInfo(int32_t ep, int32_t bat, int32_t cur, batchCtrl ctrl)
+    CBatchInfo(int64_t ep, int64_t bat, int64_t cur, batchCtrl ctrl)
        : epoch_num_(ep), batch_num_(bat), total_batch_num_(cur), ctrl_(ctrl) {}
-    CBatchInfo(int32_t ep, int32_t bat, int32_t cur) : CBatchInfo(ep, bat, cur, batchCtrl::kNoCtrl) {}
+    CBatchInfo(int64_t ep, int64_t bat, int64_t cur) : CBatchInfo(ep, bat, cur, batchCtrl::kNoCtrl) {}
    CBatchInfo() : CBatchInfo(0, 0, 0, batchCtrl::kNoCtrl) {}
    explicit CBatchInfo(batchCtrl ctrl) : CBatchInfo(0, 0, 0, ctrl) {}
-    int32_t epoch_num_;        // i-th epoch. i starts from 0
-    int32_t batch_num_;        // i-th batch since the start of current epoch. i starts from 0
-    int32_t total_batch_num_;  // i-th batch since the start of first epoch. i starts from 0
+    int64_t epoch_num_;        // i-th epoch. i starts from 0
+    int64_t batch_num_;        // i-th batch since the start of current epoch. i starts from 0
+    int64_t total_batch_num_;  // i-th batch since the start of first epoch. i starts from 0
    batchCtrl ctrl_;           // No control=0, EOE=1, EOF=2, Quit=3
-    const int32_t get_batch_num() const { return batch_num_; }
-    const int32_t get_epoch_num() const { return epoch_num_; }
+    const int64_t get_batch_num() const { return batch_num_; }
+    const int64_t get_epoch_num() const { return epoch_num_; }
  };

  // BatchOp constructor
--- a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc
@ -109,11 +109,15 @@ void DatasetOp::Print(std::ostream &out, bool show_all) const {

 // Gets the next buffer from the given child
 Status DatasetOp::GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer, int32_t worker_id, bool retry_if_eoe) {
+#if defined(_WIN32) || defined(_WIN64)
+  RETURN_IF_NOT_OK(out_connector_->PopWithRetry(static_cast<int>(worker_id), p_buffer, retry_if_eoe));
+#else
  std::unique_ptr<DataBuffer> next_buff;
  // pop is a blocked call and will throw an interruption if the whole group shuts down.
  RETURN_IF_NOT_OK(out_connector_->PopWithRetry(static_cast<int>(worker_id), &next_buff, retry_if_eoe));

  *p_buffer = std::move(next_buff);
+#endif
  return Status::OK();
 }

@ -161,15 +165,18 @@ Status DatasetOp::EofReceived(int32_t worker_id) {
  return (out_connector_->Add(static_cast<int>(worker_id), std::move(eof_buffer)));
 }

-// During tree prepare phase, operators may have specific operations to perform depending on
+// During tree prepare phase, operators may have specific pre-operations to perform depending on
 // their role.
-Status DatasetOp::PrepareNodeAction() {
+Status DatasetOp::PrepareNodePreAction() {
+  if (BitTest(tree_->PrepareFlags(), ExecutionTree::kDePrepRepeat)) set_control_flag(kDeOpRepeated);
+  return Status::OK();
+}
+// During tree prepare phase, operators may have specific post-operations to perform depending on
+// their role.
+Status DatasetOp::PrepareNodePostAction() {
  // If this op does not have any children and it is in a repeat path of the tree...
-  if (child_.size() == 0 && BitTest(tree_->PrepareFlags(), ExecutionTree::kDePrepRepeat)) {
-    // Then, flag this operator as a leaf node in a repeat path of tree execution.
-    BitSet(&op_ctrl_flags_, kDeOpRepeated);
-
-    // Secondly, push ourselves onto the tree repeat stack.  Later, the repeat operator
+  if (child_.empty() && BitTest(op_ctrl_flags_, kDeOpRepeated)) {
+    // push ourselves onto the tree repeat stack.  Later, the repeat operator
    // above us will consume them.
    tree_->AddToRepeatStack(shared_from_this());
  }
--- a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h
@ -150,11 +150,17 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
    return Status::OK();
  }

-  // During tree prepare phase, operators may have specific operations to perform depending on
+  // During tree prepare phase, operators may have specific pre-operations to perform depending on
  // their role.
  // @notes Derived versions of this function should always call it's superclass version first
  // before providing their own implementations.
-  virtual Status PrepareNodeAction();
+  virtual Status PrepareNodePreAction();
+
+  // During tree prepare phase, operators may have specific post-operations to perform depending on
+  // their role.
+  // @notes Derived versions of this function should always call it's superclass version first
+  // before providing their own implementations.
+  virtual Status PrepareNodePostAction();

  // Getter function
  // @return The operator id
--- a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc
@ -65,6 +65,9 @@ MapOp::MapOp(const std::vector<std::string> &in_col_names, const std::vector<std
      tfuncs_(std::move(tensor_funcs)),
      in_columns_(in_col_names),
      out_columns_(out_col_names),
+#if defined(_WIN32) || defined(_WIN64)
+      eof_worker_id_(0),
+#endif
      perf_mode_(perf_mode) {
  // If caller didn't specify the out_col_names, assume they are same as the in_columns.
  if (out_columns_.empty() || out_columns_[0].empty()) {
@ -120,6 +123,17 @@ Status MapOp::operator()() {
      RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buff, 0));
      is_eof = buff->eof();
      RETURN_IF_NOT_OK(local_queues_[que_id]->Add(std::move(buff)));
+#if defined(_WIN32) || defined(_WIN64)
+      if (is_eof) {
+        eof_worker_id_ = que_id;
+        for (int32_t id = 0; id < num_workers_; id++) {
+          if (id != eof_worker_id_) {
+            auto eof_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
+            RETURN_IF_NOT_OK(local_queues_[id]->Add(std::move(eof_buffer)));
+          }
+        }
+      }
+#endif
      que_id = (que_id + 1) % num_workers_;
    }
  }
@ -159,6 +173,14 @@ Status MapOp::WorkerEntry(int32_t worker_id) {
      continue;
    } else if (in_buffer->eof()) {
      // Calling base class EofReceived to forward eof buffer.
+#if defined(_WIN32) || defined(_Win64)
+      if (perf_mode_) {
+        if (eof_worker_id_ == worker_id) {
+          RETURN_IF_NOT_OK(EofReceived(worker_id));
+        }
+        break;
+      }
+#endif
      RETURN_IF_NOT_OK(EofReceived(worker_id));
      break;
    }
--- a/mindspore/ccsrc/dataset/engine/datasetops/map_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/map_op.h
@ -193,6 +193,10 @@ class MapOp : public ParallelOp {
  // cause additional blocking because pop calls to Connector from the threads are synchronized to enforce the order.
  bool perf_mode_;

+#if defined(_WIN32) || defined(_WIN64)
+  // EOF worker id is only work on Performance mode, to record the worker id of queue which gets EOF
+  int32_t eof_worker_id_;
+#endif
  // Private function for worker/thread to loop continuously. It comprises the main
  // logic of MapOp: getting the data from previous Op, validating user specified column names,
  // applying a list of TensorOps to each of the data, process the results and then
--- a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.h
@ -64,14 +64,24 @@ class ParallelOp : public DatasetOp {
    return out;
  }

-  // During tree prepare phase, operators may have specific operations to perform depending on
+  // During tree prepare phase, operators may have specific pre-operations to perform depending on
  // their role.
  // @notes Derived versions of this function should always call it's superclass version first
  // before providing their own implementations.
  // @return Status - The error return code
-  Status PrepareNodeAction() override {
+  Status PrepareNodePreAction() override {
    // Run common code from super class before adding ParallelOp specific logic
-    return (DatasetOp::PrepareNodeAction());
+    return (DatasetOp::PrepareNodePreAction());
+  }
+
+  // During tree prepare phase, operators may have specific post-operations to perform depending on
+  // their role.
+  // @notes Derived versions of this function should always call it's superclass version first
+  // before providing their own implementations.
+  // @return Status - The error return code
+  Status PrepareNodePostAction() override {
+    // Run common code from super class before adding ParallelOp specific logic
+    return (DatasetOp::PrepareNodePostAction());
  }

  // Override base class reset to provide reset actions specific to the ParallelOp class.
--- a/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.h
@ -64,13 +64,22 @@ class PipelineOp : public DatasetOp {
  // @return The number of threads that push data to the output connector
  int32_t num_producers() const override { return 1; }

-  // During tree prepare phase, operators may have specific operations to perform depending on
+  // During tree prepare phase, operators may have specific pre-operations to perform depending on
  // their role.
  // @notes Derived versions of this function should always call it's superclass version first
  // before providing their own implementations.
-  Status PrepareNodeAction() override {
+  Status PrepareNodePreAction() override {
    // Run common code from super class before adding PipelineOp specific logic
-    return (DatasetOp::PrepareNodeAction());
+    return (DatasetOp::PrepareNodePreAction());
+  }
+
+  // During tree prepare phase, operators may have specific post-operations to perform depending on
+  // their role.
+  // @notes Derived versions of this function should always call it's superclass version first
+  // before providing their own implementations.
+  Status PrepareNodePostAction() override {
+    // Run common code from super class before adding PipelineOp specific logic
+    return (DatasetOp::PrepareNodePostAction());
  }

 protected:
--- a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc
@ -58,10 +58,10 @@ void RepeatOp::Print(std::ostream &out, bool show_all) const {
  out << "RepeatOp:"
      << "\nCurrent repeat count: " << repeat_count_ << "\nMax repeat count: " << max_repeats_
      << "\nLeaf Nodes in my execution path:";
-  if (!leaf_ops_.empty()) {
+  if (!eoe_ops_.empty()) {
    out << "\n";
-    for (size_t i = 0; i < leaf_ops_.size(); i++) {
-      out << "  Operator: " << leaf_ops_[i]->id() << "\n";
+    for (size_t i = 0; i < eoe_ops_.size(); i++) {
+      out << "  Operator: " << eoe_ops_[i]->id() << "\n";
    }
  } else {
    out << " kNone.";
@ -71,21 +71,17 @@ void RepeatOp::Print(std::ostream &out, bool show_all) const {

 // Base-class override for executing specific RepeatOp configurations. This code will be called
 // during the execution tree prepare phase when it is visiting this operator.
-Status RepeatOp::PrepareNodeAction() {
+Status RepeatOp::PrepareNodePostAction() {
  // Run any common code from super class first before adding our own specific logic
-  RETURN_IF_NOT_OK(PipelineOp::PrepareNodeAction());
+  RETURN_IF_NOT_OK(PipelineOp::PrepareNodePostAction());
  std::shared_ptr<DatasetOp> leaf_op = tree_->PopFromRepeatStack();
  while (leaf_op != nullptr) {
    // Track the leaf operators that are under this repeat op.
-    leaf_ops_.push_back(leaf_op);
-
-    // Special case.  If the repeat count is 1, then pre-flag the leaf nodes
-    // to tell them they are already at their last op:
-    if (max_repeats_ == 1) {
-      leaf_op->set_control_flag(kDeOpLastRepeat);
-    }
+    eoe_ops_.push_back(leaf_op);
    leaf_op = tree_->PopFromRepeatStack();
  }
+  // Push ourselves to the stack in case one of our ascendants is repeat too.
+  tree_->AddToRepeatStack(shared_from_this());
  return Status::OK();
 }

@ -127,16 +123,20 @@ Status RepeatOp::GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer, int32_t wo
 Status RepeatOp::EoeReceived(int32_t worker_id) {
  repeat_count_++;
  MS_LOG(INFO) << "Repeat operator end of epoch message received. Repeat count is now: " << repeat_count_ << ".";
-
-  // If we've reached the requested repeat count, then flag the leaf nodes
+  bool repeated = BitTest(op_ctrl_flags_, kDeOpRepeated);
+  bool last_repeat = BitTest(op_ctrl_flags_, kDeOpLastRepeat);
+  // If we've reached the requested repeat count, then flag the eoe nodes
  // to tell them they've got one more epoch to perform.  When they reach the end
-  // of the last epoch, they quit rather than loop again.
-  if (max_repeats_ != kInfiniteRepeat && repeat_count_ == (max_repeats_ - 1)) {
-    for (size_t i = 0; i < leaf_ops_.size(); i++) {
-      leaf_ops_[i]->set_control_flag(kDeOpLastRepeat);
+  // of the last epoch, they quit rather than loop again. This happens in two cases:
+  // 1- We are also repeated (by another repeat op) and we are at the last repetition. Or,
+  // 2- We are not repeated
+  if (max_repeats_ != kInfiniteRepeat && repeat_count_ == (max_repeats_ - 1) && (!repeated || last_repeat)) {
+    for (auto &eoe_op : eoe_ops_) {
+      eoe_op->set_control_flag(kDeOpLastRepeat);
    }
  }
  if (repeat_count_ == max_repeats_) {
+    repeat_count_ = 0;
    state_ = OpState::kDeOpIdle;
    return Status::OK();
  }
--- a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h
@ -87,8 +87,8 @@ class RepeatOp : public PipelineOp {
  uint32_t PrepareFlags() const override;

  // Base-class override for executing specific RepeatOp configurations. This code will be called
-  // during the execution tree prepare phase when it is visiting this operator.
-  Status PrepareNodeAction() override;
+  // during the execution tree post-prepare phase when it is visiting this operator.
+  Status PrepareNodePostAction() override;

  // This function returns the buffer that is at the top of our output connector. The caller is
  // typically our parent node, when the parent is asking us to provide the next buffer of data.
@ -119,9 +119,9 @@ class RepeatOp : public PipelineOp {
  int32_t num_producers() const override;

 private:
-  int32_t max_repeats_;                               // The number of repeats that the user requested
-  int32_t repeat_count_;                              // A counter for the current number of executed repeats
-  std::vector<std::shared_ptr<DatasetOp>> leaf_ops_;  // List of leaf operators underneath this repeat.
+  int32_t max_repeats_;                              // The number of repeats that the user requested
+  int32_t repeat_count_;                             // A counter for the current number of executed repeats
+  std::vector<std::shared_ptr<DatasetOp>> eoe_ops_;  // List of operators that can generate EOE underneath this repeat.
 };
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc
@ -85,7 +85,11 @@ Status ShuffleOp::SelfReset() {
  if (!reshuffle_each_epoch_) {
    rng_ = std::mt19937_64(shuffle_seed_);
  } else {
+#if defined(_WIN32) || defined(_WIN64)
+    std::random_device random_device;
+#else
    std::random_device random_device("/dev/urandom");
+#endif
    std::uniform_int_distribution<int32_t> distribution(0, std::numeric_limits<int32_t>::max());
    shuffle_seed_ = distribution(random_device);
    rng_ = std::mt19937_64(shuffle_seed_);
--- a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc
@ -0,0 +1,132 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <iostream>
+#include <utility>
+
+#include "dataset/engine/data_buffer.h"
+#include "dataset/engine/datasetops/skip_op.h"
+#include "dataset/engine/db_connector.h"
+#include "dataset/engine/execution_tree.h"
+
+#include "utils/log_adapter.h"
+
+namespace mindspore {
+namespace dataset {
+// Builder constructor.  Creates the builder object.
+SkipOp::Builder::Builder(int32_t count) : build_max_skips_(count) {}
+
+Status SkipOp::Builder::SanityCheck() const {
+  if (build_max_skips_ < 0) {
+    std::string err_msg("Skip count must be positive integer or 0.");
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  return Status::OK();
+}
+
+// The builder "build" method creates the final object.
+Status SkipOp::Builder::Build(std::shared_ptr<SkipOp> *ptr) {
+  RETURN_IF_NOT_OK(SanityCheck());
+  *ptr = std::make_shared<SkipOp>(build_max_skips_);
+  return Status::OK();
+}
+
+// Constructor of the SkipOp.
+SkipOp::SkipOp(int32_t count) : PipelineOp(0), max_skips_(count), skip_count_(0) {}
+
+// Destructor
+SkipOp::~SkipOp() {}
+
+// A print method typically used for debugging
+void SkipOp::Print(std::ostream &out, bool show_all) const {
+  // Call base class printer first
+  PipelineOp::Print(out, show_all);
+
+  // Then display our own stuff
+  out << "SkipOp:"
+      << "\nCurrent skip count: " << skip_count_ << "\nMax skip count: " << max_skips_;
+}
+
+// Since the buffer may contain multi rows, this function will drop the rows
+// that need to skip in it, and then return the buffer.
+Status SkipOp::GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer, int32_t worker_id, bool retry_if_eoe) {
+  if (child_.empty()) {
+    RETURN_STATUS_UNEXPECTED("SkipOp can't be the leaf node.");
+  }
+
+  std::unique_ptr<DataBuffer> buf;
+  // Drop first max_skips_ rows
+  while (skip_count_ < max_skips_) {
+    RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf, worker_id, true));
+    if (buf->eoe() || buf->eof()) {
+      break;
+    }
+
+    // Consider the rows of buffer more than 1
+    TensorRow drop_row;
+    int row_num = buf->NumRows();
+    for (int i = 0; i < row_num; i++) {
+      RETURN_IF_NOT_OK(buf->PopRow(&drop_row));
+      if (++skip_count_ == max_skips_) {
+        break;
+      }
+    }
+  }
+
+  // If buffer is none or the rows of buffer is 0,
+  // then get a buffer from child.
+  if (!buf || buf->NumRows() == 0) {
+    if (buf && buf->eof()) {
+      *p_buffer = std::move(buf);
+      return Status::OK();
+    }
+    RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf, worker_id, true));
+  }
+
+  // Handling eoe and eof
+  if (buf->eoe() || buf->eof()) {
+    RETURN_IF_NOT_OK(EoeReceived(worker_id));
+    if (state_ == OpState::kDeOpIdle) {
+      *p_buffer = std::move(buf);
+      return Status::OK();
+    }
+  }
+
+  *p_buffer = std::move(buf);
+  return Status::OK();
+}
+
+// Base-class override for handling cases when an eoe is received.
+Status SkipOp::EoeReceived(int32_t worker_id) {
+  skip_count_ = 0;
+  state_ = OpState::kDeOpIdle;
+  return Status::OK();
+}
+
+// Class functor operator () override.
+// Most dataset ops operate by launching a thread (see ExecutionTree).
+// However, the SkipOp is defined as a inlined operator, so it is invalid to
+// launch the functor since this op runs inlined inside another operator.  The
+// function is overloaded to ensure that it is not called by mistake (it will
+// generate an error).
+Status SkipOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); }
+
+// Base-class override for handling cases when an eof is received.
+Status SkipOp::EofReceived(int32_t worker_id) {
+  MS_LOG(INFO) << "Skip operator EOF received, do nothing now.";
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h
@ -0,0 +1,95 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_DATASETOPS_SKIP_OP_H_
+#define DATASET_ENGINE_DATASETOPS_SKIP_OP_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+#include "dataset/engine/datasetops/pipeline_op.h"
+
+namespace mindspore {
+namespace dataset {
+class SkipOp : public PipelineOp {
+ public:
+  class Builder {
+   public:
+    // Builder constructor.  Creates the builder object.
+    // @note No default args
+    // @param count - The number of skip to do
+    // @return This is a constructor.
+    explicit Builder(int32_t count);
+
+    // Default destructor
+    ~Builder() = default;
+
+    // The builder "build" method creates the final object.
+    // @return shared_ptr to the new StorageOp object
+    Status Build(std::shared_ptr<SkipOp> *);
+
+   private:
+    int32_t build_max_skips_;
+
+    Status SanityCheck() const;
+  };
+
+  // Constructor of the SkipOp.
+  // @note The builder class should be used to call it
+  // @param count - The number of skips to do
+  explicit SkipOp(int32_t count);
+
+  // Destructor
+  ~SkipOp();
+
+  // A print method typically used for debugging
+  // @param out - The output stream to write output to
+  // @param show_all - A bool to control if you want to show all info or just a summary
+  void Print(std::ostream &out, bool show_all) const override;
+
+  // Class functor operator () override.
+  // Most dataset ops operate by launching a thread (see ExecutionTree).
+  // However, the SkipOp is defined as a inlined operator, so it is invalid to launch the
+  // functor since this op runs inlined inside another operator.  The function is overloaded to
+  // ensure that it is not called by mistake (it will generate an error).
+  // @return Status - The error code return
+  Status operator()() override;
+
+  // This function returns the buffer that is at the top of our output connector. The caller is
+  // typically our parent node, when the parent is asking us to provide the next buffer of data.
+  // Since SkipOp is an inlined op, getting a buffer from us will simply bounce you to get
+  // a buffer from our child.
+  // @param p_buffer - output pointer to the buffer that it will fetch.
+  // @param worker_id - The worker id
+  // @param retry_if_eoe Set this flag to true to allow calling pop() again after the first pop() returns EOE.
+  // @return Status - The error code return
+  Status GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer, int32_t worker_id, bool retry_if_eoe) override;
+
+  // Base-class override for handling cases when an eoe is received.
+  // @param worker_id - The worker id
+  Status EoeReceived(int32_t worker_id) override;
+
+  // Base-class override for handling cases when an eof is received.
+  // @param worker_id - The worker id
+  Status EofReceived(int32_t worker_id) override;
+
+ private:
+  int32_t max_skips_;   // The number of skips that the user requested
+  int32_t skip_count_;  // A counter for the current number of executed skips
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_DATASETOPS_SKIP_OP_H_
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
@ -20,4 +20,4 @@ add_library(engine-datasetops-source OBJECT
    celeba_op.cc
    )

-add_dependencies(engine-datasetops-source protobuf::libprotobuf)
+add_dependencies(engine-datasetops-source mindspore::protobuf)
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
@ -100,7 +100,7 @@ Status CelebAOp::LaunchThreadsAndInitOp() {
  RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&CelebAOp::WorkerEntry, this, std::placeholders::_1)));
  TaskManager::FindMe()->Post();
  RETURN_IF_NOT_OK(ParseImageAttrInfo());
-  RETURN_IF_NOT_OK(sampler_->Init(this));
+  RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this));

  return Status::OK();
 }
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
@ -240,7 +240,7 @@ Status CifarOp::Reset() {

 // hand shake with Sampler, allow Sampler to call RandomAccessOp's functions to get NumRows
 Status CifarOp::InitSampler() {
-  RETURN_IF_NOT_OK(sampler_->Init(this));
+  RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this));
  return Status::OK();
 }

--- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
@ -258,7 +258,7 @@ Status ImageFolderOp::Reset() {

 // hand shake with Sampler, allow Sampler to call RandomAccessOp's functions to get NumRows
 Status ImageFolderOp::InitSampler() {
-  RETURN_IF_NOT_OK(sampler_->Init(this));
+  RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this));
  return Status::OK();
 }

--- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
@ -254,7 +254,7 @@ Status ManifestOp::Reset() {

 // hand shake with Sampler, allow Sampler to call RandomAccessOp's functions to get NumRows
 Status ManifestOp::InitSampler() {
-  RETURN_IF_NOT_OK(sampler_->Init(this));
+  RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this));
  return Status::OK();
 }

--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
@ -205,7 +205,7 @@ Status MnistOp::Reset() {

 // hand shake with Sampler, allow Sampler to call RandomAccessOp's functions to get NumRows
 Status MnistOp::InitSampler() {
-  RETURN_IF_NOT_OK(sampler_->Init(this));
+  RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this));
  return Status::OK();
 }

--- a/Show More
+++ b/Show More